Collaborative Filtering (SVD):

In [45]:
#Transform data from json to csv file


import json
import pandas as pd

# Load JSON data from a file
with open('orders1.json', 'r') as f:
    data = json.load(f)

# List to hold the data for creating the DataFrame
order_list = []

# Iterate through each order and extract relevant information
for user_id, order in data.items():
    customer_name = order["customer_name"]
    phone_number = order["phone_number"]
    order_time = order["order_details"]["order_time"]

    # Extract pizzas
    pizzas = order["order_details"].get("pizzas", [])
    for pizza in pizzas:
        pizza_info = {
            "user_id": user_id,
            "customer_name": customer_name,
            "phone_number": phone_number,
            "order_time": order_time,
            "item_type": "pizza",
            "size": pizza.get("size", "N/A"),
            "toppings": pizza.get("toppings", "N/A"),
            "quantity": pizza.get("quantity", 0)
        }
        order_list.append(pizza_info)

    # Extract beverages
    beverages = order["order_details"].get("beverages", [])
    for beverage in beverages:
        beverage_info = {
            "user_id": user_id,
            "customer_name": customer_name,
            "phone_number": phone_number,
            "order_time": order_time,
            "item_type": "beverage",
            "item": beverage.get("item", "N/A"),
            "quantity": beverage.get("quantity", 0)
        }
        order_list.append(beverage_info)

    # Extract extras (if available)
    extras = order["order_details"].get("extras", [])
    for extra in extras:
        extra_info = {
            "user_id": user_id,
            "customer_name": customer_name,
            "phone_number": phone_number,
            "order_time": order_time,
            "item_type": "extra",
            "item": extra,
            "quantity": 1
        }
        order_list.append(extra_info)

# Create a pandas DataFrame
df = pd.DataFrame(order_list)


# Optionally, save to a CSV file
df.to_csv("orders.csv", index=False)

df.head(3)

Unnamed: 0,user_id,customer_name,phone_number,order_time,item_type,size,toppings,quantity,item
0,3,soulemane,3475445527,2024-12-14 12:14:45.633819,pizza,Small,Cheese,1,
1,4,soulemane,3475445527,2024-12-14 12:15:33.477505,pizza,Medium,Cheese,1,
2,5,soulemane,3475445527,2024-12-14 12:23:11.361059,beverage,,,2,Coke


Recommender  Algorithm

In [49]:
import pandas as pd
from surprise import SVD, Dataset, Reader
from surprise.model_selection import train_test_split
from surprise import accuracy
import random
import joblib

# Function to create menu items
def create_menu():
    menu_items = [
        "Pizza Margherita", "Spaghetti Carbonara", "Lasagna", "Penne Arrabbiata", "Cannoli",
        "Coke", "Pepsi", "Lemonade", "Water", "Tea"
    ]
    return pd.DataFrame({'Item_ID': range(1, len(menu_items) + 1), 'Item_Name': menu_items})

# Function to create order history based on the new dataset
def create_orders(num_orders=30000):
    user_ids = [random.choice([1, 2, 3, 4, 5]) for _ in range(num_orders)]
    item_ids = [random.choice([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) for _ in range(num_orders)]
    quantities = [random.randint(1, 5) for _ in range(num_orders)]  # Quantity range from 1 to 5

    item_types = ['pizza', 'pasta', 'pizza', 'pasta', 'dessert', 'beverage', 'beverage', 'beverage', 'beverage', 'beverage']
    sizes = ['Small', 'Medium', 'Large', None, None, None, None, None, None, None]
    toppings = ['Cheese', 'Tomato', 'Bacon', 'Pepperoni', None, None, None, None, None, None]

    item_type = [random.choice(item_types) for _ in range(num_orders)]
    size = [random.choice(sizes) for _ in range(num_orders)]
    topping = [random.choice(toppings) for _ in range(num_orders)]

    return pd.DataFrame({
        'user_id': user_ids,
        'item_id': item_ids,
        'quantity': quantities,
        'item_type': item_type,
        'size': size,
        'toppings': topping,
        'order_time': pd.to_datetime(["2024-12-14 12:00:00"] * num_orders)  # Placeholder time
    })

# Prepare data for the Surprise model
def prepare_data(df_orders):
    df_orders['quantity'] = df_orders['quantity'].apply(lambda x: min(x, 5))  # Max rating is 5
    reader = Reader(rating_scale=(1, 5))  # Rating scale (1-5)
    return Dataset.load_from_df(df_orders[['user_id', 'item_id', 'quantity']], reader)

# Train the SVD model
def train_model(data, test_size=0.2):
    trainset, testset = train_test_split(data, test_size=test_size)
    model = SVD()
    model.fit(trainset)
    return model, testset

# Save the trained model to a file
def save_model(model, filename):
    joblib.dump(model, filename)

# Load the model from a file
def load_model(filename):
    return joblib.load(filename)

# Generate recommendations for a user based on item types, sizes, and toppings
def get_recommendations(user_id, model, df_menu, top_n=5):
    items = df_menu['Item_ID'].tolist()
    recommendations = []
    for item_id in items:
        pred = model.predict(user_id, item_id)
        item_name = df_menu[df_menu['Item_ID'] == item_id]['Item_Name'].values[0]
        recommendations.append((item_name, pred.est))

    recommendations.sort(key=lambda x: x[1], reverse=True)
    return recommendations[:top_n]

# Main function to execute the entire workflow
def main():
    df_menu = create_menu()
    df_orders = create_orders()
    data = prepare_data(df_orders)

    model_filename = 'svd_model.pkl'

    # Split data for training and testing
    trainset, testset = train_test_split(data, test_size=0.2)

    # Check if the model already exists and load it, otherwise train and save it
    try:
        model = load_model(model_filename)
        print("Model loaded from file.")
    except FileNotFoundError:
        print("Model not found, training a new model...")
        model, _ = train_model(data)  # Only need the model, not the testset here
        save_model(model, model_filename)
        print("Model saved to file.")

    # Evaluate model accuracy
    print("Evaluating model accuracy...")
    predictions = model.test(testset)
    rmse = accuracy.rmse(predictions)
    print(f"RMSE: {rmse:.4f}")

    # Generate recommendations for User 1
    recommendations = get_recommendations(user_id=1, model=model, df_menu=df_menu, top_n=5)

    print(f"Recommendations for User 1:")
    for item_name, rating in recommendations:
        print(f"{item_name} with predicted rating: {rating:.2f}")

# Run the main function
if __name__ == "__main__":
    main()


Model loaded from file.
Evaluating model accuracy...
RMSE: 1.4130
RMSE: 1.4130
Recommendations for User 1:
Water with predicted rating: 3.27
Pepsi with predicted rating: 3.19
Lemonade with predicted rating: 3.15
Cannoli with predicted rating: 3.12
Lasagna with predicted rating: 3.11
