In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import pickle as pkl


from surprise import SVD
from surprise import Dataset, Reader
from surprise.model_selection import train_test_split
from surprise.accuracy import rmse

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Collaborative Filtering

## Singular Value Decomposition (SVD)


In [2]:
# Load data
df_interactions_train = pd.read_csv("data/interactions_train.csv")

# Drop unmapped id columns
df_interactions_train.drop(['user_id','recipe_id'],axis=1,inplace=True)

# Rename mapped id columns
df_interactions_train.rename(columns={"u": "user_id","i":"recipe_id"},inplace=True)
df_interactions_train.insert(0, "user_id", df_interactions_train.pop("user_id"))
df_interactions_train.insert(1, "recipe_id", df_interactions_train.pop("recipe_id"))

# Convert 'date' column to datetime format
df_interactions_train['date'] = pd.to_datetime(df_interactions_train['date'])

In [3]:
# Load data into Surprise format
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(df_interactions_train[['user_id', 'recipe_id', 'rating']], reader)

# Train-test split
trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

# Train SVD model
svd = SVD()
svd.fit(trainset)

# Evaluate the model
predictions = svd.test(testset)
rmse(predictions)

RMSE: 0.9204


0.9204051235024998

# Content Filtering

## Cosine Similarity
