In [48]:
import pandas as pd
import numpy as np
import os 
import matplotlib.pyplot as plt
import seaborn as sns

In [49]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score


In [65]:
df = pd.read_csv('userdata.csv')

In [51]:
to_encode = list(df.select_dtypes(include=['object']).columns)[1:]
print(to_encode)

['Dining_Hall', 'Time', 'Station_Name']


In [52]:
df_Dining_Hall = pd.get_dummies(df['Dining_Hall'], prefix='Dining_Hall_')
df = df.join(df_Dining_Hall)
df.drop(columns = 'Dining_Hall', inplace=True)

df_Time = pd.get_dummies(df['Time'], prefix='Time_')
df = df.join(df_Time)
df.drop(columns = 'Time', inplace=True)

df_Station_Name = pd.get_dummies(df['Station_Name'], prefix='Station_Name_')
df = df.join(df_Station_Name)
df.drop(columns = 'Station_Name', inplace=True)


In [53]:
Nutrient = df['Calories'] + df['saturated_fat'] + df['protein'] + df['carbohydrates']

df = df.merge(Nutrient.rename('Nutrient'), left_index=True, right_index=True)
df.drop(columns = 'Calories', inplace=True)
df.drop(columns = 'saturated_fat', inplace=True)
df.drop(columns = 'protein', inplace=True)
df.drop(columns = 'carbohydrates', inplace=True)
#df.drop(columns = 'Name', inplace=True)


In [54]:
df.dtypes

Name                               object
is_vegetarian                       int64
is_vegan                            int64
is_sargent_choice                   int64
is_glutenfree                       int64
is_halal                            int64
is_sustainable                      int64
Dining_Hall__Marciano               uint8
Dining_Hall__Warren                 uint8
Dining_Hall__West                   uint8
Time__Breakfast                     uint8
Time__Dinner                        uint8
Time__Lunch                         uint8
Station_Name__Asian Fusion          uint8
Station_Name__Bakery                uint8
Station_Name__Brick Oven            uint8
Station_Name__Cop2                  uint8
Station_Name__Deli                  uint8
Station_Name__Exhibition Saute      uint8
Station_Name__Gluten Free           uint8
Station_Name__Grill                 uint8
Station_Name__Home Zone             uint8
Station_Name__International         uint8
Station_Name__Mediterranean       

In [55]:
df_train = df.iloc[:int(0.2 * len(df)),1:]
df_test = df.iloc[int(0.2 * len(df)):,1:]

In [56]:
from sklearn.metrics.pairwise import cosine_similarity


In [57]:
similarity = cosine_similarity(df_train, df_test)

similarities = []
for i in range(len(similarity)):
    similarities.append(sum(similarity[i]))
similarities

[75.97841237249217,
 75.98745424074218,
 75.99044467197929,
 75.98773189310694,
 2.9384375625140735,
 75.98222831558985,
 75.9816247913705,
 75.97879191394259,
 75.9806902431433,
 75.9795833873188,
 75.95427412419434,
 75.97568877339482,
 75.98888949689267,
 2.5130013462363494,
 75.97881000384022,
 75.97784145764368,
 75.9802224589712,
 75.97733269720496,
 75.9725298727644]

In [58]:
Name = df.Name
Rank={}
for i in range(len(similarities)):
    Rank[Name[i]]=similarities[i]
Rank

{'Chocolate Chip Coffee Cake': 75.97841237249217,
 'Hash Brown Potatoes': 75.98745424074218,
 'Chicken Noodle Soup': 75.9725298727644,
 'Brown Rice': 75.98773189310694,
 'Create Your Own Sandwich Station': 2.5130013462363494,
 'Cosmic Brownie': 75.98222831558985,
 'BBQ Chicken and Carmelized Onion Pizza': 75.9816247913705,
 'Local Homestyle French Fries': 75.97879191394259,
 'Grilled Chicken Breast': 75.9806902431433,
 'Sesame Honey Salmon': 75.9795833873188,
 'Hard Boiled Egg': 75.95427412419434,
 'Chocolate Chip Blondie': 75.97568877339482,
 'Chicken & Asiago Sub': 75.98888949689267,
 'Chicken & Veggie Stir Fry': 75.97881000384022,
 'Mandarin Fried Rice with Tempeh': 75.97784145764368,
 'Crunchy Onion Rings': 75.9802224589712,
 'BBQ Onion & Grilled Portobello Sandwich': 75.97733269720496}

In [59]:
Rank_sorted = {k: v for k, v in sorted(Rank.items(), key=lambda item: item[1])}
Rank_sorted

{'Create Your Own Sandwich Station': 2.5130013462363494,
 'Hard Boiled Egg': 75.95427412419434,
 'Chicken Noodle Soup': 75.9725298727644,
 'Chocolate Chip Blondie': 75.97568877339482,
 'BBQ Onion & Grilled Portobello Sandwich': 75.97733269720496,
 'Mandarin Fried Rice with Tempeh': 75.97784145764368,
 'Chocolate Chip Coffee Cake': 75.97841237249217,
 'Local Homestyle French Fries': 75.97879191394259,
 'Chicken & Veggie Stir Fry': 75.97881000384022,
 'Sesame Honey Salmon': 75.9795833873188,
 'Crunchy Onion Rings': 75.9802224589712,
 'Grilled Chicken Breast': 75.9806902431433,
 'BBQ Chicken and Carmelized Onion Pizza': 75.9816247913705,
 'Cosmic Brownie': 75.98222831558985,
 'Hash Brown Potatoes': 75.98745424074218,
 'Brown Rice': 75.98773189310694,
 'Chicken & Asiago Sub': 75.98888949689267}

In [84]:
df.loc[df.Name=='Chicken & Veggie Stir Fry']

Unnamed: 0,Name,Dining_Hall,Time,Station_Name,is_vegetarian,is_vegan,is_sargent_choice,is_glutenfree,is_halal,is_sustainable,Calories,saturated_fat,protein,carbohydrates
14,Chicken & Veggie Stir Fry,Warren,Lunch,Gluten Free,0,0,0,1,1,1,85,1,6,2


In [45]:
df.head()

Unnamed: 0,Name,Dining_Hall,Time,Station_Name,is_vegetarian,is_vegan,is_sargent_choice,is_glutenfree,is_halal,is_sustainable,Calories,saturated_fat,protein,carbohydrates
0,Chocolate Chip Coffee Cake,Marciano,Breakfast,Bakery,1,0,0,0,1,1,190,3,2,30
1,Hash Brown Potatoes,Marciano,Lunch,Grill,0,1,0,0,0,1,150,1,2,20
2,Chicken Noodle Soup,Marciano,Lunch,Home Zone,0,0,0,0,0,1,90,0,6,11
3,Brown Rice,Marciano,Lunch,Gluten Free,0,1,0,1,1,1,110,0,2,23
4,Create Your Own Sandwich Station,Marciano,Lunch,Deli,0,0,0,0,0,0,0,0,0,0
