# Pobranie danych oraz stworzenie macierzy oceny

In [81]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
np.set_printoptions(threshold=np.inf)

max_move_id = 10000

file_rating_path = 'ml-latest-small/ratings.csv'
rating = pd.read_csv(file_rating_path)
rating = rating[rating['movieId'] < max_move_id]
max_movie_id = rating['movieId'].max()

#rating = rating.loc[rating['movieId'] < 10000]
movies_ids = rating['movieId'].unique()
#print(movies_ids)

file_movie_path = 'ml-latest-small/movies.csv'
all_possible_movies = pd.DataFrame({'movieId': np.arange(1, max_movie_id + 1)})
movies = pd.read_csv(file_movie_path)
movies = movies[movies['movieId'].isin(movies_ids)]
movies = pd.merge(all_possible_movies, movies, how='left', on='movieId')

print(movies.head(5))

index = pd.MultiIndex.from_product([rating['userId'].unique(), np.arange(1, max_movie_id + 1)], names=['userId', 'movieId'])
rating_matrix = pd.DataFrame(index=index).reset_index()
rating_matrix = pd.merge(rating_matrix, rating, how='left', on=['userId', 'movieId'])
rating_matrix = rating_matrix.pivot(index='userId', columns='movieId', values='rating')
rating_matrix = rating_matrix.fillna(0)
rating_matrix = np.array(rating_matrix)

print(rating_matrix.shape)
no_movies = rating_matrix.shape[1]

   movieId                               title  \
0        1                    Toy Story (1995)   
1        2                      Jumanji (1995)   
2        3             Grumpier Old Men (1995)   
3        4            Waiting to Exhale (1995)   
4        5  Father of the Bride Part II (1995)   

                                        genres  
0  Adventure|Animation|Children|Comedy|Fantasy  
1                   Adventure|Children|Fantasy  
2                               Comedy|Romance  
3                         Comedy|Drama|Romance  
4                                       Comedy  
(610, 9018)


# Funkcja do systemu rekomendacji

Troche o podobienstwie cosinusowym:

    - wartośc cosinusa jest od -1 (filmy przeciwne) do +1 (filmy bardzo podobne)
    - w oficjalnym wzorze jes to  x dot y / norm(x) norm(y) = cos O, ale po normalizacji norm(x) = 1, norm(y) = 1

In [82]:
def get_recommendation(rating_matrix, personal_rating):
   #print(np.linalg.norm(rating_matrix, axis=0))
   normalized_rating_matrix = np.nan_to_num(rating_matrix/np.linalg.norm(rating_matrix, axis=0))
   # Obliczamy podobieństwo cosinusowe z każdym użytkownikiem (skorzystamy z mnożenia macierzowego)
   z=np.dot(normalized_rating_matrix, np.nan_to_num(np.array(personal_rating)/np.linalg.norm(personal_rating)))
   # Teraz mamy profil filmowy - w jakim stopniu jestesmy podobni do danego uzytkownika
   # print(len(z))
   recommendation = np.dot(normalized_rating_matrix.T, np.nan_to_num(z/np.linalg.norm(z)))
   return recommendation

def propose_recommendations(recommendation, movies_matrix):
   recommendation_matrix = movies_matrix.copy()
   recommendation_matrix['recommendation'] = recommendation
   recommendation_matrix = recommendation_matrix.sort_values(by='recommendation', ascending=False)
   return recommendation_matrix[['movieId', 'title', 'genres', 'recommendation']]
   


In [86]:
my_ratings = np.zeros(no_movies)
my_ratings[8] = 5 
my_ratings[297] = 3 
my_ratings[324] = 5 
my_ratings[17] = 4 


recommendation = get_recommendation(rating_matrix, my_ratings)
#print(len(recommendation))
propositions = propose_recommendations(recommendation, movies)
print(propositions.head(10))

      movieId                                          title  \
324       325          National Lampoon's Senior Trip (1995)   
8           9                            Sudden Death (1995)   
17         18                              Four Rooms (1995)   
78         79                              Juror, The (1996)   
809       810                                  Kazaam (1996)   
2322     2323                             Cruise, The (1998)   
6121     6122  Richard Pryor Live on the Sunset Strip (1982)   
101       102                               Mr. Wrong (1996)   
710       711                                 Flipper (1996)   
4977     4978                                 Lantana (2001)   

                       genres  recommendation  
324                    Comedy        0.600919  
8                      Action        0.596031  
17                     Comedy        0.528017  
78             Drama|Thriller        0.460196  
809   Children|Comedy|Fantasy        0.421419  
2322   

  normalized_rating_matrix = np.nan_to_num(rating_matrix/np.linalg.norm(rating_matrix, axis=0))
