In [33]:
import pandas as pd
data = pd.read_csv('/content/movies.csv')
data

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy
...,...,...,...
9737,193581,Black Butler: Book of the Atlantic (2017),Action|Animation|Comedy|Fantasy
9738,193583,No Game No Life: Zero (2017),Animation|Comedy|Fantasy
9739,193585,Flint (2017),Drama
9740,193587,Bungo Stray Dogs: Dead Apple (2018),Action|Animation


In [34]:
data1 = pd.read_csv('/content/ratings.csv')
data1

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931
...,...,...,...,...
100831,610,166534,4.0,1493848402
100832,610,168248,5.0,1493850091
100833,610,168250,5.0,1494273047
100834,610,168252,5.0,1493846352


In [61]:
# Convertendo a coluna 'timestamp' para datetime
data1['datetime'] = pd.to_datetime(data1['timestamp'], unit='s')

data1

Unnamed: 0,userId,movieId,rating,timestamp,datetime
0,1,1,4.0,964982703,2000-07-30 18:45:03
1,1,3,4.0,964981247,2000-07-30 18:20:47
2,1,6,4.0,964982224,2000-07-30 18:37:04
3,1,47,5.0,964983815,2000-07-30 19:03:35
4,1,50,5.0,964982931,2000-07-30 18:48:51
...,...,...,...,...,...
100831,610,166534,4.0,1493848402,2017-05-03 21:53:22
100832,610,168248,5.0,1493850091,2017-05-03 22:21:31
100833,610,168250,5.0,1494273047,2017-05-08 19:50:47
100834,610,168252,5.0,1493846352,2017-05-03 21:19:12


In [35]:
best_seller = data1['movieId'].value_counts()
best_seller.head(10)

Unnamed: 0_level_0,count
movieId,Unnamed: 1_level_1
356,329
318,317
296,307
593,279
2571,278
260,251
480,238
110,237
589,224
527,220


In [36]:

# Se best_seller é uma Series, converta para DataFrame
best_seller_df = best_seller.reset_index()
best_seller_df.columns = ['movieId', 'frequencia']  # Renomeia as colunas, se necessário

# Fazendo o merge com a base 'data' para pegar os nomes dos filmes
best_seller_com_nome = best_seller_df.merge(data[['movieId', 'title']],
                                            on='movieId',
                                            how='left')

# Exibindo o resultado
best_seller_com_nome.head(10)

Unnamed: 0,movieId,frequencia,title
0,356,329,Forrest Gump (1994)
1,318,317,"Shawshank Redemption, The (1994)"
2,296,307,Pulp Fiction (1994)
3,593,279,"Silence of the Lambs, The (1991)"
4,2571,278,"Matrix, The (1999)"
5,260,251,Star Wars: Episode IV - A New Hope (1977)
6,480,238,Jurassic Park (1993)
7,110,237,Braveheart (1995)
8,589,224,Terminator 2: Judgment Day (1991)
9,527,220,Schindler's List (1993)


In [57]:
data2 = data1.drop('timestamp',axis=1)

In [58]:
import pandas as pd
from sklearn.neighbors import NearestNeighbors

# Criar a matriz de avaliações a partir de data2
ratings_matrix = data2.pivot(index='userId', columns='movieId', values='rating').fillna(0)
ratings_matrix_T = ratings_matrix.T

# Aplicar KNN com similaridade do cosseno
knn = NearestNeighbors(metric='cosine', algorithm='brute')
knn.fit(ratings_matrix_T)

# Função para recomendar filmes não vistos
def recommend_unseen_movies_knn_weighted(user_id, ratings_matrix, knn_model, k=10, top_n=4, min_rating=4.0):
    user_ratings = ratings_matrix.loc[user_id]
    liked_movies = user_ratings[user_ratings >= min_rating].index
    unseen_movies = user_ratings[user_ratings == 0].index
    recommendations = {}

    for movie in liked_movies:
        distances, indices = knn_model.kneighbors([ratings_matrix_T.loc[movie]], n_neighbors=k+1)
        similar_movies = ratings_matrix_T.index[indices[0][1:]]
        similarities = 1 - distances[0][1:]

        for sim_movie, similarity in zip(similar_movies, similarities):
            if sim_movie in unseen_movies:
                if sim_movie not in recommendations:
                    recommendations[sim_movie] = 0
                recommendations[sim_movie] += similarity * user_ratings[movie]

    sorted_recommendations = sorted(recommendations.items(), key=lambda x: x[1], reverse=True)
    return sorted_recommendations[:top_n]

# Função para recomendar filmes vistos (rewatches)
def recommend_seen_movies(user_id, ratings_matrix, top_n=2, min_rating=4.0):
    user_ratings = ratings_matrix.loc[user_id]
    seen_movies = user_ratings[user_ratings >= min_rating].index
    recommendations = [(movie, user_ratings[movie]) for movie in seen_movies]
    return sorted(recommendations, key=lambda x: x[1], reverse=True)[:top_n]

# Função para mapear movieId para títulos
def map_to_titles(recommendations, movies_df):
    return [(movies_df[movies_df['movieId'] == movie_id]['title'].values[0], score)
            for movie_id, score in recommendations if movie_id in movies_df['movieId'].values]

# Gerar recomendações com títulos para os usuários 1 a 5
for user_id in range(1, 6):
    print(f"\nUsuário {user_id}:")
    # Filmes não vistos
    unseen_recs = recommend_unseen_movies_knn_weighted(user_id, ratings_matrix, knn, k=10, top_n=4)
    unseen_recs_with_titles = map_to_titles(unseen_recs, data)
    print("Filmes não vistos:", unseen_recs_with_titles)
    # Filmes vistos
    seen_recs = recommend_seen_movies(user_id, ratings_matrix, top_n=2)
    seen_recs_with_titles = map_to_titles(seen_recs, data)
    print("Filmes vistos (rewatch):", seen_recs_with_titles)


Usuário 1:
Filmes não vistos: [("Ferris Bueller's Day Off (1986)", 44.501502279762285), ('Aliens (1986)', 31.087731996046497), ('Peter Pan (1953)', 26.267611983361842), ('True Lies (1994)', 25.26087532613268)]
Filmes vistos (rewatch): [('Seven (a.k.a. Se7en) (1995)', 5.0), ('Usual Suspects, The (1995)', 5.0)]

Usuário 2:
Filmes não vistos: [('Fight Club (1999)', 12.261658909263787), ('Batman Begins (2005)', 7.779304004449063), ('Deadpool (2016)', 5.837379445680707), ('Kill Bill: Vol. 2 (2004)', 5.6300440741896685)]
Filmes vistos (rewatch): [('Step Brothers (2008)', 5.0), ('Inside Job (2010)', 5.0)]

Usuário 3:
Filmes não vistos: [('Mad Max Beyond Thunderdome (1985)', 5.091251742575056), ('Predator (1987)', 4.870494480781407), ('Big Trouble in Little China (1986)', 4.721712055645144), ('RoboCop (1987)', 4.464928263720786)]
Filmes vistos (rewatch): [('Escape from L.A. (1996)', 5.0), ('Saturn 3 (1980)', 5.0)]

Usuário 4:
Filmes não vistos: [("Ferris Bueller's Day Off (1986)", 28.89540526