<a href="https://colab.research.google.com/github/Nekokan1500/Machine-Learning/blob/main/Example_Recommendation_System.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -U scikit-surprise==1.1.0

In [16]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_openml
from surprise.dataset import Dataset
from surprise import Reader
from surprise.model_selection import train_test_split
from surprise.model_selection import cross_validate
from surprise import AlgoBase

In [5]:
data = fetch_openml(data_id=1220)

df = pd.DataFrame(data['data'], columns=data['feature_names'])[['user_id', 'ad_id']].astype(int)
df['user_rating'] = pd.Series(data['target']).astype(int)

df['user_rating'].mean()
df.head(4)

Unnamed: 0,user_id,ad_id,user_rating
0,0,8343295,0
1,562934,20017077,1
2,11621116,21348354,0
3,8778348,20366086,0


In [12]:
df.head(2).groupby(['user_id', 'ad_id']).max().reset_index()

Unnamed: 0,user_id,ad_id,user_rating
0,0,8343295,0
1,562934,20017077,1


In [6]:
# Processing and splitting the dataset: transform the dataset into a user-item
# rating matrix. Each row in the matrix represents a user, each column
# represents an item, and the values in each cell represent the rating given by
# each user to the corresponding item.
df.head(10).groupby(['user_id', 'ad_id']).max().reset_index().pivot(
    'user_id', 'ad_id', 'user_rating'
).fillna(0).astype(int)

ad_id,6803526,8343295,9027213,20017077,20366086,20886690,21186478,21348354,21367376,21811752
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,0,0,0,0,0,0,0,0,0,0
562934,0,0,0,1,0,0,0,0,0,0
579253,0,0,0,0,0,0,0,0,0,0
2886008,0,0,0,0,0,0,1,0,0,0
5277279,0,0,0,0,0,0,0,0,0,0
7589739,0,0,0,0,0,0,0,0,0,0
8778348,0,0,0,0,0,0,0,0,0,0
11621116,0,0,0,0,0,0,0,0,0,0
11808635,0,0,1,0,0,0,0,0,0,0
12118311,0,0,0,0,0,0,0,0,0,0


In [14]:
# More efficient datastore by Surprise
reader = Reader(rating_scale=(0,1))
dataset = Dataset.load_from_df(df, reader)
trainset, testset = train_test_split(dataset, test_size=0.25)

In [19]:
def predict_evaluate(recsys, dataset, name='Algorithm'):
  scores = cross_validate(recsys, dataset, measures=['RMSE', 'MAE'], cv=4)
  print('Testset Avg. MAE: {:.2f} & Avg. RMSE: {:.2f} [{}]'.format(
      scores['test_mae'].mean(), scores['test_rmse'].mean(), name
  ))

In [17]:
# Creating a random recommender
class RandomRating(AlgoBase):
  def __init__(self, p=0.5):
      self.p = p
      AlgoBase.__init__(self)

  def estimate(self, u, i):
    return np.random.binomial(n=1, p=self.p, size=1)[0]

In [20]:
recsys = RandomRating(p=0.168)
predict_evaluate(recsys, dataset, 'RandomRating')

Testset Avg. MAE: 0.28 & Avg. RMSE: 0.53 [RandomRating]
