In [13]:
import pandas as pd
import numpy as np
from database import engine
from ingredient_map import create_ingredient_map
from quant_preprocess import query_data
from quant_preprocess import cols_to_lower
from quant_preprocess import get_cols_list
from quant_preprocess import query_and_preprocess_data
from quant_preprocess import shape_data_long
from quant_preprocess import merge_long
from quant_preprocess import pivot_wide
from quant_preprocess import recode_ingredients
import os


INPUT_PATH = os.path.join("..//data", "ingredient_prices_clean.csv")

df_drinks = query_and_preprocess_data()
df_prices = pd.read_csv(INPUT_PATH, header=None)

In [2]:
def create_dummies(df):
    
    headers = df.columns[1:]
    d = {'strdrink': df['strdrink'].values.tolist()}
    for e in range(0, len(headers)):
        d[headers[e]] = (df[str(headers[e])] > 0).astype(int).values.tolist()
    return pd.DataFrame.from_dict(d)

In [3]:
def summmary_of_oz(df):
   
    return df.describe().transpose().sort_values('mean',
                                ascending = False).head(10)

In [4]:
def summary_of_usage():
    
    df = query_and_preprocess_data()
    df = create_dummies(df)
    dum_df = create_dummies(df)
    headers = dum_df.columns[1:].values.tolist()
    data = dum_df.describe().transpose().sort_values('mean', 
                                    ascending=False).head(10)
        
    return data
    

In [90]:
def get_amount_table():
    df = query_data()
    ingred_cols = get_cols_list(df, "stringredient")
    measure_cols = get_cols_list(df, "strmeasure")
    df[ingred_cols] = cols_to_lower(df, ingred_cols)
    ingredient_long = shape_data_long(
            df, ingred_cols, "stringredient", "", "ingredient"
        )
    measure_long = shape_data_long(df, measure_cols, "strmeasure", "_clean", "amount")
    combined_long = merge_long(ingredient_long, measure_long)
    ingredient_dict = create_ingredient_map()
    recoded_long = recode_ingredients(combined_long, ingredient_dict)
    combined_wide = pivot_wide(recoded_long)
    return combined_wide


In [333]:
r = get_amount_table()
r = r.sum(axis=1)

  r.sum(axis=1)


0       2.5
1      17.5
2       8.5
3       5.0
4       2.0
       ... 
623     9.0
624     7.0
625     8.0
626     1.5
627     6.0
Length: 628, dtype: float64

In [179]:
def prices_list():
    
    df_prices = pd.read_csv(INPUT_PATH, header=None)
    df_prices = df_prices.sort_values(0, ascending=True)
    df_prices = df_prices.transpose()
    prices = df_prices.loc[1].values.tolist()
    return prices

In [192]:
def combine_prices_ingredients():
    ingredients = get_amount_table()
    ingredients = ingredients.transpose()
    ingredients =  ingredients.drop(labels="strdrink",axis=0)
    prices = prices_list()
    ingredients["prices"] = prices
    return ingredients

In [244]:
def get_ingredient_cost():
    df = get_amount_table()
    drinks = df["strdrink"].values.tolist()
    df = combine_prices_ingredients()
    for i in range(0,627):
        df[i] = df[i]*df["prices"]
    df= df.transpose()
    df = df.drop(labels = "prices", axis = 0)
    df["strdrink"] = drinks
    df = df.transpose()
    
    return df

In [344]:
def drink_cost():
    data = get_ingredient_cost()
    amounts = get_amount_table()
    data.columns = data.loc['strdrink']
    drinks = data.columns.values.tolist()
    data = data.drop("strdrink", axis=0)
    df_cost = data.transpose()
    cost = df_cost.sum(axis=1).values.tolist()
    amounts = amounts.sum(axis=1).values.tolist()
    d = {
        "strdrink": drinks,
        "cost": cost,
        "total oz": amounts
    }
    df = pd.DataFrame(d)
    
    return df

In [346]:
df = drink_cost()

  amounts = amounts.sum(axis=1).values.tolist()


In [347]:
df

Unnamed: 0,strdrink,cost,total oz
0,1-900-FUK-MEUP,1.400374,2.5
1,110 in the shade,2.589531,17.5
2,151 Florida Bushwacker,3.997031,8.5
3,155 Belmont,1.994557,5.0
4,24k nightmare,1.097500,2.0
...,...,...,...
623,Zizi Coin-coin,5.553424,9.0
624,Zoksel,1.568750,7.0
625,Zombie,5.520212,8.0
626,Zorbatini,0.736056,1.5


In [349]:
df["cost"].corr(df["total oz"])

0.9717407567440367