In [37]:
import pandas as pd
import numpy as np
from database import engine
from ingredient_map import create_ingredient_map
from quant_preprocess import query_data
from quant_preprocess import cols_to_lower
from quant_preprocess import get_cols_list
from quant_preprocess import query_and_preprocess_data
from quant_preprocess import shape_data_long
from quant_preprocess import merge_long
from quant_preprocess import pivot_wide
from quant_preprocess import recode_ingredients
import os


INPUT_PATH = os.path.join("..//data", "ingredient_prices_clean.csv")

df_drinks = query_and_preprocess_data()


In [22]:
def create_dummies(df):
    
    headers = df.columns[1:]
    d = {'strdrink': df['strdrink'].values.tolist()}
    for e in range(0, len(headers)):
        d[headers[e]] = (df[str(headers[e])] > 0).astype(int).values.tolist()
    return pd.DataFrame.from_dict(d)

In [23]:
def summmary_of_oz(df):
   
    return df.describe().transpose().sort_values('mean',
                                ascending = False).head(10)

In [24]:
def summary_of_usage():
    
    df = query_and_preprocess_data()
    df = create_dummies(df)
    dum_df = create_dummies(df)
    headers = dum_df.columns[1:].values.tolist()
    data = dum_df.describe().transpose().sort_values('mean', 
                                    ascending=False).head(10)
        
    return data
    

In [25]:
summary_of_usage()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
fruit juice,628.0,0.394904,0.48922,0.0,0.0,0.0,1.0,1.0
liqueur,628.0,0.35828,0.479877,0.0,0.0,0.0,1.0,1.0
fruit,628.0,0.321656,0.467484,0.0,0.0,0.0,1.0,1.0
sugar,628.0,0.316879,0.465631,0.0,0.0,0.0,1.0,1.0
rum,628.0,0.18949,0.39221,0.0,0.0,0.0,0.0,1.0
gin,628.0,0.176752,0.381762,0.0,0.0,0.0,0.0,1.0
vodka,628.0,0.167197,0.37345,0.0,0.0,0.0,0.0,1.0
spice,628.0,0.152866,0.360145,0.0,0.0,0.0,0.0,1.0
milk,628.0,0.146497,0.353885,0.0,0.0,0.0,0.0,1.0
soda,628.0,0.130573,0.337202,0.0,0.0,0.0,0.0,1.0


In [26]:
def get_amount_table():
    df = query_data()
    ingred_cols = get_cols_list(df, "stringredient")
    measure_cols = get_cols_list(df, "strmeasure")
    df[ingred_cols] = cols_to_lower(df, ingred_cols)
    ingredient_long = shape_data_long(
            df, ingred_cols, "stringredient", "", "ingredient"
        )
    measure_long = shape_data_long(df, measure_cols, "strmeasure", "_clean", "amount")
    combined_long = merge_long(ingredient_long, measure_long)
    ingredient_dict = create_ingredient_map()
    recoded_long = recode_ingredients(combined_long, ingredient_dict)
    combined_wide = pivot_wide(recoded_long)
    return combined_wide


In [30]:
r = get_amount_table()
r = r.sum(axis=0)

In [31]:
get_amount_table()

ingredient,strdrink,absinthe,advocaat,aperitif,aquavit,baileys,beer,bitters,bourbon,brandy,...,sugard,sweet and sour,tequila,tonic water,vermouth,vodka,water,whiskey,wine,zima
0,1-900-FUK-MEUP,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0
1,110 in the shade,0.0,0.0,0.00,0.0,0.0,16.0,0.0,0.0,0.0,...,0.0,0.0,1.5,0.0,0.0,0.00,0.0,0.0,0.0,0.0
2,151 Florida Bushwacker,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0
3,155 Belmont,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.00,0.0,0.0,0.0,0.0
4,24k nightmare,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
623,Zizi Coin-coin,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0
624,Zoksel,0.0,0.0,0.00,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0
625,Zombie,0.0,0.0,0.00,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0
626,Zorbatini,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.25,0.0,0.0,0.0,0.0


In [38]:
def prices_list():
    
    df_prices = pd.read_csv(INPUT_PATH, header=None)
    df_prices = df_prices.sort_values(0, ascending=True)
    df_prices = df_prices.transpose()
    prices = df_prices.loc[1].values.tolist()
    return prices

In [39]:
prices_list()

FileNotFoundError: [Errno 2] No such file or directory: '..//data/ingredient_prices_clean.csv'

In [34]:
def combine_prices_ingredients():
    ingredients = get_amount_table()
    ingredients = ingredients.transpose()
    ingredients =  ingredients.drop(labels="strdrink",axis=0)
    prices = prices_list()
    ingredients["prices"] = prices
    return ingredients

In [12]:
def get_ingredient_cost():
    df = get_amount_table()
    drinks = df["strdrink"].values.tolist()
    df = combine_prices_ingredients()
    for i in range(0,627):
        df[i] = df[i]*df["prices"]
    df= df.transpose()
    df = df.drop(labels = "prices", axis = 0)
    df["strdrink"] = drinks
    df = df.transpose()
    
    return df

In [13]:
def drink_cost():
    data = get_ingredient_cost()
    amounts = get_amount_table()
    data.columns = data.loc['strdrink']
    drinks = data.columns.values.tolist()
    data = data.drop("strdrink", axis=0)
    df_cost = data.transpose()
    cost = df_cost.sum(axis=1).values.tolist()
    amounts = amounts.sum(axis=1).values.tolist()
    d = {
        "strdrink": drinks,
        "cost": cost,
        "total oz": amounts
    }
    df = pd.DataFrame(d)
    
    return df

In [15]:
df = drink_cost()

FileNotFoundError: [Errno 2] No such file or directory: '..//data/ingredient_prices_clean.csv'

In [16]:
df

NameError: name 'df' is not defined

In [17]:
df["cost"].corr(df["total oz"])

NameError: name 'df' is not defined