In [2]:

import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
from surprise import Dataset, Reader, KNNBasic, accuracy
from surprise.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.metrics import precision_score, recall_score

# Load your dataset with simulated ratings
df = pd.read_csv('/kaggle/input/bakery-with-simulatedrating/Bakery_with_SimulatedRating.csv')

label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(df.DateTime)
df['DateTime'] = encoded_labels

label_encoder = LabelEncoder()
encoded_labels1 = label_encoder.fit_transform(df.Daypart)
df['Daypart'] = encoded_labels1

label_encoder = LabelEncoder()
encoded_labels2 = label_encoder.fit_transform(df.DayType)
df['DayType'] = encoded_labels2

# Collaborative Filtering (using Surprise)
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(df[['TransactionNo', 'Items', 'SimulatedRating']], reader)
trainset, testset = train_test_split(data, test_size=0.2)

cf_model = KNNBasic()
cf_model.fit(trainset)
cf_predictions = cf_model.test(testset)

# Calculate MAE and RMSE for Collaborative Filtering
cf_true_ratings = [testset[i][2] for i in range(len(testset))]
cf_pred_ratings = [cf_predictions[i].est for i in range(len(cf_predictions))]

cf_mae = mean_absolute_error(cf_true_ratings, cf_pred_ratings)
cf_rmse = np.sqrt(mean_squared_error(cf_true_ratings, cf_pred_ratings))

print(f'Collaborative Filtering MAE: {cf_mae}')
print(f'Collaborative Filtering RMSE: {cf_rmse}')


# Content-Based Filtering (using scikit-learn)
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf_vectorizer.fit_transform(df['Items'])

cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

# Function to get content-based recommendations for a given item
def get_content_based_recommendations(item_index):
    sim_scores = list(enumerate(cosine_sim[item_index]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:6]  # Top 5 similar items (excluding itself)
    item_indices = [i[0] for i in sim_scores]
    return df['Items'].iloc[item_indices]

# Hybrid Recommendation System
def hybrid_recommendation(transaction_no):
    # Assuming 'transaction_no' is a unique identifier for a transaction
    # Get collaborative filtering recommendations for the given transaction
    # f_top_n = get_cf_recommendations(transaction_no)

    # Check if the transaction number exists in the dataset
    if transaction_no not in df['TransactionNo'].values:
        return "Transaction not found."

    # Find the item associated with the transaction (you may need to adapt this)
    item_index = df[df['TransactionNo'] == transaction_no].index[0]
    
    # Check if item_index is within bounds
    if item_index >= len(df):
        return "Item not found."

    # Get content-based recommendations for the item
    content_based_recommendations = get_content_based_recommendations(item_index)

    # Combine content-based recommendations with top-N CF recommendations
    # For simplicity, let's use the top-N items from CF for the hybrid recommendations
    cf_top_n = cf_model.get_neighbors(item_index, k=5)  # Get top 5 similar items using CF

    # Combine collaborative filtering and content-based recommendations
    hybrid_recommendations = list(set(content_based_recommendations).union(set(cf_top_n)))
    
    # Evaluate precision and recall
    true_items = df[df['TransactionNo'] == transaction_no]['Items'].tolist()
    true_items_set = set(true_items)
    recommended_items_set = set(hybrid_recommendations)

    precision = len(true_items_set.intersection(recommended_items_set)) / len(recommended_items_set)
    recall = len(true_items_set.intersection(recommended_items_set)) / len(true_items_set)

    #return precision, recall
    return hybrid_recommendations

# Example usage
transaction_no = 56  # Replace with the transaction number you want to get recommendations for
recommendations = hybrid_recommendation(transaction_no)
print(recommendations)

#precision, recall = hybrid_recommendation(transaction_no)
#print(f'Hybrid Recommendation Precision: {precision}')
#print(f'Hybrid Recommendation Recall: {recall}')


# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

Computing the msd similarity matrix...
Done computing similarity matrix.
Collaborative Filtering MAE: 0.35278335479468054
Collaborative Filtering RMSE: 0.783090321598133
[0, 2, 4, 5, 6, 'Coffee']
