# Importando os dados

In [12]:
import pandas as pd

df_filmes = pd.read_csv("u.data", sep="\t", names=["user_id", "prod_id", "rating", "timestamp"])
df_filmes.head()

Unnamed: 0,user_id,prod_id,rating,timestamp
0,196,242,3,881250949
1,186,302,3,891717742
2,22,377,1,878887116
3,244,51,2,880606923
4,166,346,1,886397596


# Ajustando a data

In [13]:
from datetime import datetime

data_convertida = []

for i in df_filmes['timestamp']:
    ts = int(i)
    data_convertida.append(datetime.utcfromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S'))

In [14]:
# Dataframe de data temporário
new_data_df = pd.DataFrame({'Data': data_convertida})

In [15]:
df_filmes['timestamp'] = new_data_df

In [16]:
df_filmes.head()

Unnamed: 0,user_id,prod_id,rating,timestamp
0,196,242,3,1997-12-04 15:55:49
1,186,302,3,1998-04-04 19:22:22
2,22,377,1,1997-11-07 07:18:36
3,244,51,2,1997-11-27 05:02:03
4,166,346,1,1998-02-02 05:33:16


# Fazendo o treinamento

In [17]:
# Importando pacotes
from surprise import KNNWithMeans
from surprise import Dataset
from surprise import accuracy
from surprise import Reader
import os
from surprise.model_selection import train_test_split
from sklearn.decomposition import TruncatedSVD

In [18]:
# Lendo o dataset
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(df_filmes[['user_id', 'prod_id', 'rating']],reader)

# Split dos dados
trainset, testset = train_test_split(data, test_size=0.3,random_state=10)

# Criação de um modelo baseado em item (user_based true / false para alternar entre filtragem colaborativa baseada em usuário ou baseada em item)
algo = KNNWithMeans(k=5, sim_options={'user_based': False})
algo.fit(trainset)

# Teste do modelo
test_pred = algo.test(testset)

Computing the msd similarity matrix...
Done computing similarity matrix.


In [19]:
print("Item-based Model : Test Set")
accuracy.rmse(test_pred, verbose=True)

Item-based Model : Test Set
RMSE: 1.0051


1.00506760750583

In [20]:
algo.get_neighbors(0, 10)

[24, 65, 292, 637, 933, 946, 976, 1185, 1208, 1244]

# Salvando o modelo em um arquivo

In [21]:
import pickle

filename = 'modelo.pk1'
pickle.dump(algo, open(filename, 'wb'))