In [9]:
import pandas as pd
import numpy as np
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics.pairwise import cosine_similarity

In [10]:
# Loading the data:

# Load the file for dlg data
dlg_food_items = pd.read_csv('Dining Hall Food Items - DLG.csv', na_values=['null'])

# Look at the first few rows
print(dlg_food_items.head())

                            Food Item Serving Location
0                 Arroz con Leche (v)     Grill (Cafe)
1            Sausage & Cheese Biscuit     Grill (Cafe)
2        Cage Free Scrambled Eggs (v)     Grill (Cafe)
3  Cage Free Scrambled Egg Whites (v)     Grill (Cafe)
4                   Hash Browns (vgn)     Grill (Cafe)


In [11]:
# Cleaning and modifying the data:

# Set seed for reproducibility
np.random.seed(42)

# Define a simple heuristic-based tagging system
def tag_item(item):
    item = item.lower()
    if any(word in item for word in ["cake", "pie", "cobbler", "cookie", "brownie", "bar", "bun", "muffin", "scone"]):
        return "dessert"
    elif any(word in item for word in ["pizza", "burger", "burrito", "taco", "wrap", "sub", "sandwich", "quesadilla", "enchilada"]):
        return "main_fast"
    elif any(word in item for word in ["stir fry", "pasta", "ravioli", "lasagna", "penne"]):
        return "main_entree"
    elif any(word in item for word in ["soup", "chowder", "stew"]):
        return "soup"
    elif any(word in item for word in ["rice", "potato", "beans", "vegetable", "corn", "salad", "spinach", "greens"]):
        return "side"
    elif any(word in item for word in ["oatmeal", "pancake", "waffle", "biscuit", "toast", "cereal", "french toast"]):
        return "breakfast"
    elif any(word in item for word in ["sauce", "salsa", "relish", "bread", "roll", "naan", "tortilla"]):
        return "condiment_or_bread"
    else:
        return "other"

# Rating estimator
def estimate_rating(tag):
    ratings = {
        "dessert": 4.6,
        "main_fast": 4.3,
        "main_entree": 4.4,
        "soup": 4.2,
        "side": 4.0,
        "breakfast": 4.3,
        "condiment_or_bread": 3.9,
        "other": 4.1
    }
    return ratings.get(tag, 4.1)

# Diversified rating generator
tag_std_dev = {
    "dessert": 0.8,
    "main_fast": 1,
    "main_entree": 0.8,
    "soup": 0.8,
    "side": 1,
    "breakfast": 0.8,
    "condiment_or_bread": 1.2,
    "other": 0.8
}

def diversified_rating(row):
    mean = estimate_rating(row["Tag"])
    std_dev = tag_std_dev.get(row["Tag"], 0.2)
    rating = np.random.normal(loc=mean, scale=std_dev)
    return max(1.0, min(5.0, round(rating, 2)))

# Apply tagging and generate ratings
dlg_food_items["Tag"] = dlg_food_items["Food Item"].apply(tag_item)
dlg_food_items["Diversified Rating"] = dlg_food_items.apply(diversified_rating, axis=1)

# Look at the first few rows
print(dlg_food_items.head())

                            Food Item Serving Location        Tag  \
0                 Arroz con Leche (v)     Grill (Cafe)      other   
1            Sausage & Cheese Biscuit     Grill (Cafe)  breakfast   
2        Cage Free Scrambled Eggs (v)     Grill (Cafe)      other   
3  Cage Free Scrambled Egg Whites (v)     Grill (Cafe)      other   
4                   Hash Browns (vgn)     Grill (Cafe)      other   

   Diversified Rating  
0                4.50  
1                4.19  
2                4.62  
3                5.00  
4                3.91  


In [12]:
# Setting up the recommender system:

# Step 1: Set up the ColumnTransformer
preprocessor = ColumnTransformer(
    transformers=[
        ('tag', OneHotEncoder(sparse_output=False), ['Tag']),
        ('location', OneHotEncoder(sparse_output=False), ['Serving Location'])
    ],
    remainder='passthrough'  # keep the other columns (like Diversified Rating)
)

# Step 2: Fit and transform the features
feature_matrix = preprocessor.fit_transform(dlg_food_items[['Diversified Rating', 'Tag', 'Serving Location']])

# Step 3: Precompute the full cosine similarity matrix
similarity_matrix = cosine_similarity(feature_matrix)

# Step 4: For each food item, find the top 4 recommendations
def dlg_generate_all_recommendations():
    all_recommendations = []

    for idx, food_name in enumerate(dlg_food_items['Food Item']):
        similarities = similarity_matrix[idx]

        # Set self-similarity to -1 so it doesn't recommend itself
        similarities[idx] = -1

        # Get indices of top 4 similar items
        top_indices = similarities.argsort()[::-1][:4]

        recs = dlg_food_items.iloc[top_indices][['Food Item', 'Serving Location']].values.tolist()

        while len(recs) < 4:
            recs.append(["None", "None"])

        rec1, rec1_loc = recs[0]
        rec2, rec2_loc = recs[1]
        rec3, rec3_loc = recs[2]
        rec4, rec4_loc = recs[3]

        all_recommendations.append((food_name, rec1, rec1_loc, rec2, rec2_loc, rec3, rec3_loc, rec4, rec4_loc))
    
    recommendations_df = pd.DataFrame(all_recommendations, columns=[
        'Food Item', 
        'Rec 1', 'Rec 1 Location', 
        'Rec 2', 'Rec 2 Location',
        'Rec 3', 'Rec 3 Location',
        'Rec 4', 'Rec 4 Location'
    ])
    
    return recommendations_df

# Step 5: Run it
dlg_all_recs = dlg_generate_all_recommendations()
print(dlg_all_recs.head())


                            Food Item                           Rec 1  \
0                 Arroz con Leche (v)           Sausage Patty Chicken   
1            Sausage & Cheese Biscuit                      Waffle (v)   
2        Cage Free Scrambled Eggs (v)                  Kosher Hot Dog   
3  Cage Free Scrambled Egg Whites (v)  Krinkle Cut French Fries (vgn)   
4                   Hash Browns (vgn)    Grilled Cheddar on Wheat (v)   

  Rec 1 Location                                     Rec 2 Rec 2 Location  \
0   Grill (Cafe)           Grilled Cheese on Sourdough (v)   Grill (Cafe)   
1   Grill (Cafe)                Egg and Cheese Biscuit (v)   Grill (Cafe)   
2   Grill (Cafe)                     Sausage Patty Chicken   Grill (Cafe)   
3   Grill (Cafe)  Mexican Style Cage Free Scrambled Egg(v)   Grill (Cafe)   
4   Grill (Cafe)            Grilled Ham & Cheddar on Wheat   Grill (Cafe)   

                                Rec 3 Rec 3 Location  \
0        Cage Free Scrambled Eggs (v)   Gr

In [13]:
# Saving to CSV:

dlg_all_recs.to_csv("de-la-guerra.csv", index=False)

In [14]:
# Loading the data:

# Load the file for carrillo data
carrillo_food_items = pd.read_csv('Dining Hall Food Items - Carrillo.csv', na_values=['null'])

# Look at the first few rows
print(carrillo_food_items.head())

                      Food Item Serving Location
0                 Oatmeal (vgn)     Grill (Cafe)
1                         Bacon     Grill (Cafe)
2   Apple Crepe w/Caramel Sauce     Grill (Cafe)
3             Hash Browns (vgn)     Grill (Cafe)
4  Cage Free Eggs Scrambled (v)     Grill (Cafe)


In [15]:
# Cleaning and modifying the data:

# Set seed for reproducibility
np.random.seed(42)

# Apply tagging and generate ratings
carrillo_food_items["Tag"] = carrillo_food_items["Food Item"].apply(tag_item)
carrillo_food_items["Diversified Rating"] = carrillo_food_items.apply(diversified_rating, axis=1)

# Look at the first few rows
print(carrillo_food_items.head())

                      Food Item Serving Location                 Tag  \
0                 Oatmeal (vgn)     Grill (Cafe)           breakfast   
1                         Bacon     Grill (Cafe)               other   
2   Apple Crepe w/Caramel Sauce     Grill (Cafe)  condiment_or_bread   
3             Hash Browns (vgn)     Grill (Cafe)               other   
4  Cage Free Eggs Scrambled (v)     Grill (Cafe)               other   

   Diversified Rating  
0                4.70  
1                3.99  
2                4.68  
3                5.00  
4                3.91  


In [16]:
# Setting up the recommender system:

# Step 1: Set up the ColumnTransformer
preprocessor = ColumnTransformer(
    transformers=[
        ('tag', OneHotEncoder(sparse_output=False), ['Tag']),
        ('location', OneHotEncoder(sparse_output=False), ['Serving Location'])
    ],
    remainder='passthrough'  # keep the other columns (like Diversified Rating)
)

# Step 2: Fit and transform the features
feature_matrix = preprocessor.fit_transform(carrillo_food_items[['Diversified Rating', 'Tag', 'Serving Location']])

# Step 3: Precompute the full cosine similarity matrix
similarity_matrix = cosine_similarity(feature_matrix)

# Step 4: For each food item, find the top 4 recommendations
def carrillo_generate_all_recommendations():
    all_recommendations = []

    for idx, food_name in enumerate(carrillo_food_items['Food Item']):
        similarities = similarity_matrix[idx]

        # Set self-similarity to -1 so it doesn't recommend itself
        similarities[idx] = -1

        # Get indices of top 4 similar items
        top_indices = similarities.argsort()[::-1][:4]

        recs = carrillo_food_items.iloc[top_indices][['Food Item', 'Serving Location']].values.tolist()

        while len(recs) < 4:
            recs.append(["None", "None"])

        rec1, rec1_loc = recs[0]
        rec2, rec2_loc = recs[1]
        rec3, rec3_loc = recs[2]
        rec4, rec4_loc = recs[3]

        all_recommendations.append((food_name, rec1, rec1_loc, rec2, rec2_loc, rec3, rec3_loc, rec4, rec4_loc))
    
    recommendations_df = pd.DataFrame(all_recommendations, columns=[
        'Food Item', 
        'Rec 1', 'Rec 1 Location', 
        'Rec 2', 'Rec 2 Location',
        'Rec 3', 'Rec 3 Location',
        'Rec 4', 'Rec 4 Location'
    ])
    
    return recommendations_df

# Step 5: Run it
carrillo_all_recs = carrillo_generate_all_recommendations()
print(carrillo_all_recs.head())

                      Food Item                                 Rec 1  \
0                 Oatmeal (vgn)              Sausage Biscuits & Gravy   
1                         Bacon     Grilled Tuna & Swiss on Sourdough   
2   Apple Crepe w/Caramel Sauce  Falafel on Pita with Raita Sauce (v)   
3             Hash Browns (vgn)          Grilled Ham with Honey Glaze   
4  Cage Free Eggs Scrambled (v)    Cage Free Scrambled Egg Whites (v)   

  Rec 1 Location                                 Rec 2 Rec 2 Location  \
0   Grill (Cafe)        Fluffy Whole Wheat Waffles (v)   Grill (Cafe)   
1   Grill (Cafe)               Machaca de Jalisco Eggs   Grill (Cafe)   
2   Grill (Cafe)             Chicharron en Salsa Verde   Grill (Cafe)   
3   Grill (Cafe)  Shrimp Sopes w/Roasted Poblano Crema   Grill (Cafe)   
4   Grill (Cafe)                           Grits (vgn)   Grill (Cafe)   

                                 Rec 3 Rec 3 Location  \
0         Whole Wheat French Toast (v)   Grill (Cafe)   
1       

In [17]:
# Saving to CSV:

carrillo_all_recs.to_csv("carrillo.csv", index=False)

In [18]:
# Loading the data:

# Load the file for portola data
portola_food_items = pd.read_csv('Dining Hall Food Items - Portola.csv', na_values=['null'])

# Look at the first few rows
print(portola_food_items.head())

                      Food Item Serving Location
0  Whole Plain Greek Yogurt (v)  Greens & Grains
1         Strawberry Yogurt (v)  Greens & Grains
2              Watermelon (vgn)  Greens & Grains
3       Grapefruit Halves (vgn)  Greens & Grains
4              Cantaloupe (vgn)  Greens & Grains


In [19]:
# Cleaning and modifying the data:

# Set seed for reproducibility
np.random.seed(42)

# Apply tagging and generate ratings
portola_food_items["Tag"] = portola_food_items["Food Item"].apply(tag_item)
portola_food_items["Diversified Rating"] = portola_food_items.apply(diversified_rating, axis=1)

# Look at the first few rows
print(portola_food_items.head())

                      Food Item Serving Location    Tag  Diversified Rating
0  Whole Plain Greek Yogurt (v)  Greens & Grains  other                4.50
1         Strawberry Yogurt (v)  Greens & Grains  other                3.99
2              Watermelon (vgn)  Greens & Grains  other                4.62
3       Grapefruit Halves (vgn)  Greens & Grains  other                5.00
4              Cantaloupe (vgn)  Greens & Grains  other                3.91


In [20]:
# Setting up the recommender system:

# Step 1: Set up the ColumnTransformer
preprocessor = ColumnTransformer(
    transformers=[
        ('tag', OneHotEncoder(sparse_output=False), ['Tag']),
        ('location', OneHotEncoder(sparse_output=False), ['Serving Location'])
    ],
    remainder='passthrough'  # keep the other columns (like Diversified Rating)
)

# Step 2: Fit and transform the features
feature_matrix = preprocessor.fit_transform(portola_food_items[['Diversified Rating', 'Tag', 'Serving Location']])

# Step 3: Precompute the full cosine similarity matrix
similarity_matrix = cosine_similarity(feature_matrix)

# Step 4: For each food item, find the top 4 recommendations
def portola_generate_all_recommendations():
    all_recommendations = []

    for idx, food_name in enumerate(portola_food_items['Food Item']):
        similarities = similarity_matrix[idx]

        # Set self-similarity to -1 so it doesn't recommend itself
        similarities[idx] = -1

        # Get indices of top 4 similar items
        top_indices = similarities.argsort()[::-1][:4]

        recs = portola_food_items.iloc[top_indices][['Food Item', 'Serving Location']].values.tolist()

        while len(recs) < 4:
            recs.append(["None", "None"])

        rec1, rec1_loc = recs[0]
        rec2, rec2_loc = recs[1]
        rec3, rec3_loc = recs[2]
        rec4, rec4_loc = recs[3]

        all_recommendations.append((food_name, rec1, rec1_loc, rec2, rec2_loc, rec3, rec3_loc, rec4, rec4_loc))
    
    recommendations_df = pd.DataFrame(all_recommendations, columns=[
        'Food Item', 
        'Rec 1', 'Rec 1 Location', 
        'Rec 2', 'Rec 2 Location',
        'Rec 3', 'Rec 3 Location',
        'Rec 4', 'Rec 4 Location'
    ])
    
    return recommendations_df

# Step 5: Run it
portola_all_recs = portola_generate_all_recommendations()
print(portola_all_recs.head())

                      Food Item                     Rec 1   Rec 1 Location  \
0  Whole Plain Greek Yogurt (v)       Sliced Jack Cheese   Greens & Grains   
1         Strawberry Yogurt (v)   Sliced Swiss Cheese (v)  Greens & Grains   
2              Watermelon (vgn)  Sliced Provolone Cheese   Greens & Grains   
3       Grapefruit Halves (vgn)    Sliced Cheddar Cheese   Greens & Grains   
4              Cantaloupe (vgn)    Italian Tomato Panini   Greens & Grains   

                         Rec 2   Rec 2 Location  \
0         Raspberry Yogurt (v)  Greens & Grains   
1         Blueberry Yogurt (v)  Greens & Grains   
2                 Jack Cheese   Greens & Grains   
3  Granola Cantaloupe Parfait   Greens & Grains   
4          Sliced Genoa Salami  Greens & Grains   

                            Rec 3   Rec 3 Location  \
0       Organic Fresh Strawberry   Greens & Grains   
1    Honeydew & Berry Parfait (v)  Greens & Grains   
2              Grapefruit Halves   Greens & Grains   
3  Grano

In [21]:
# Saving to CSV:

portola_all_recs.to_csv("portola.csv", index=False)