In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
path = os.path.join("df_main.csv")
df = pd.read_csv(path)
df.columns

Index(['Unnamed: 0', 'category', 'beverage', 'food', 'rating', 'recipeName',
       'nutrition', 'ingred', 'ingr_count'],
      dtype='object')

In [3]:
wine = df[["category", "beverage", "food", "nutrition"]]
wine

Unnamed: 0,category,beverage,food,nutrition
0,White Wine,Sancerre,Chevre Cheese,"[260.2, 32.0, 10.0, 17.0, 20.0, 68.0, 2.0]"
1,White Wine,Sancerre,Goat Cheese,"[269.3, 25.0, 7.0, 15.0, 14.0, 52.0, 8.0]"
2,White Wine,"Gewürztraminer, Alsace",Muenster Cheese,"[414.4, 36.0, 6.0, 26.0, 63.0, 55.0, 4.0]"
3,Dessert Wine,Sauternes,Roquefort Cheese,"[226.2, 8.0, 162.0, 13.0, 9.0, 16.0, 14.0]"
4,Dessert Wine,Banyuls,Dark Chocolate,"[80.9, 1.0, 67.0, 0.0, 2.0, 2.0, 6.0]"
...,...,...,...,...
7780,White Wine,Grenache Blanc,Roasted Peppers,"[769.6, 49.0, 17.0, 56.0, 56.0, 69.0, 30.0]"
7781,White Wine,Grenache Blanc,Marjoram,"[64.1, 0.0, 3.0, 0.0, 5.0, 0.0, 4.0]"
7782,Red Wine,Beaujolais,Seafood Macaroni and Cheese,
7783,Red Wine,Cabernet Sauvignon,Dark Chocolate,"[80.9, 1.0, 67.0, 0.0, 2.0, 2.0, 6.0]"


In [4]:
df.columns

Index(['Unnamed: 0', 'category', 'beverage', 'food', 'rating', 'recipeName',
       'nutrition', 'ingred', 'ingr_count'],
      dtype='object')

In [5]:
# Dropping the extra column
df = df.drop(columns='Unnamed: 0')

In [6]:
df.columns

Index(['category', 'beverage', 'food', 'rating', 'recipeName', 'nutrition',
       'ingred', 'ingr_count'],
      dtype='object')

In [7]:
df = df.dropna()

In [8]:
"""This takes the ingred columns and reformats them so they can be read as lists.
    They were originally stored in the dataframe as strings, so trying to iterate through them
    printed out each individual character instead of the list element.
"""
df['ingred'] = df['ingred'].apply(lambda x: x.strip('][').split(', '))

In [9]:
# Might not be necessary, but I just wanted to see how many ingredients there were

ingredients = []
for ing in df['ingred']:
    for item in ing:
        if item not in set(ingredients):
            ingredients.append(item)

In [10]:
len(ingredients)

1882

## Breaking up the nutrition columns

In [11]:
# Same as with the ingred column. Doing this just makes it iterable as a list.
df['nutrition'] = df['nutrition'].apply(lambda x: x.strip('][').split(', '))

In [12]:
"""This dict initialization could probably be automated, but I just wrote it out :/
    The for loop runs a .apply method on the nutrition column that takes every
    value at a particular index and appends it to its corresponding number in the
    nutrition dictionary. I wasn't sure what the columns should be called, so for now,
    they're just numbers.
"""
nutrition = {
    '1': [],
    '2': [],
    '3': [],
    '4': [],
    '5': [],
    '6': [],
    '7': []
}

for i in range(1, len(nutrition.keys()) + 1):
    df['nutrition'].apply(lambda x: nutrition[str(i)].append(x[i - 1]))

In [13]:
nutrition['1']

['260.2',
 '269.3',
 '414.4',
 '226.2',
 '80.9',
 '954.7',
 '954.7',
 '1536.7',
 '365.5',
 '221.9',
 '221.9',
 '405.1',
 '243.8',
 '458.8',
 '661.9',
 '661.9',
 '241.4',
 '696.8',
 '696.8',
 '339.3',
 '339.3',
 '800.2',
 '138.9',
 '138.9',
 '669.6',
 '26.4',
 '26.4',
 '445.1',
 '445.1',
 '398.4',
 '188.0',
 '63.3',
 '116.5',
 '1144.3',
 '1144.3',
 '1144.3',
 '78.6',
 '78.6',
 '78.6',
 '78.6',
 '78.6',
 '78.6',
 '18.4',
 '338.9',
 '338.9',
 '338.9',
 '338.9',
 '926.0',
 '57.5',
 '57.5',
 '57.5',
 '57.5',
 '153.2',
 '180.5',
 '180.5',
 '566.9',
 '566.9',
 '260.8',
 '242.0',
 '1678.6',
 '501.8',
 '260.8',
 '260.8',
 '1336.4',
 '1336.4',
 '604.9',
 '318.1',
 '140.5',
 '617.7',
 '617.7',
 '1684.2',
 '1684.2',
 '173.1',
 '434.2',
 '639.3',
 '337.4',
 '337.4',
 '337.4',
 '449.3',
 '164.5',
 '159.1',
 '232.5',
 '763.1',
 '763.1',
 '105.1',
 '260.2',
 '260.2',
 '260.2',
 '346.6',
 '938.5',
 '938.5',
 '938.5',
 '938.5',
 '212.6',
 '212.6',
 '212.6',
 '212.6',
 '82.6',
 '82.6',
 '260.2',
 '366.0'

In [14]:
df2 = df.nutrition.apply(pd.Series)
df2.columns = ['team1', 'team2']

ValueError: Length mismatch: Expected axis has 7 elements, new values have 2 elements

In [None]:
df = df.drop(columns='Unnamed: 0')

In [None]:
df.shape

In [None]:
df.dropna(subset=['nutrition']).shape

In [None]:
df.dropna().shape

In [None]:
for colu

In [None]:
wine = wine[wine.nutrition != ""]
wine

In [None]:
wine[["calories", "total fat", "sugar", "sodium", "protein", "saturated fat", "carbohydrates"]] = pd.DataFrame(wine.nutrition.values.tolist(), index = wine.index)
wine