# implementation RECOMENDATION SYSTEM ML

### Importing Libraries

In [57]:
import numpy as np
import pandas as pd
import sklearn
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.simplefilter(action = 'ignore', category =FutureWarning)

### Loading Dataset

In [58]:
# Loading the rating dataset
ratings = pd.read_csv("F:/machine learning projects/CLASSIFICATION/P03/ratings.csv")
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [59]:
# Loading the movie data set
movies = pd.read_csv("F:\machine learning projects\CLASSIFICATION\P03\movies.csv")
movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


### Satistical analysis of Ratings

In [60]:
n_ratings = len(ratings)
n_movies = len(ratings['movieId'].unique())
n_users = len(ratings['userId'].unique())

print(f"Number of ratings: {n_ratings}")
print(f"Number of Unique movieID : {n_movies}")
print(f"Number of Unique Users: {n_users}")
print(f"Average Ratings per User: {round(n_ratings/n_users, 2)}")
print(f"Average Ratings per Movie: {round(n_ratings/ n_movies, 2)}")

Number of ratings: 100836
Number of Unique movieID : 9724
Number of Unique Users: 610
Average Ratings per User: 165.3
Average Ratings per Movie: 10.37


### User Rating Frequency

In [61]:
user_freq = ratings[["userId", 'movieId']].groupby("userId").count().reset_index()
user_freq.columns =["userId","n_ratings"]
print(user_freq.head())

   userId  n_ratings
0       1        232
1       2         29
2       3         39
3       4        216
4       5         44


### Movie Rating analysis.

In [62]:
# Finding the Lowest and Highest rated movies
mean_rating = ratings.groupby("movieId")[["rating"]].mean()

# lOWEST RATED MOVIES
lowest_rated = mean_rating["rating"].idxmin()
movies.loc[movies['movieId'] ==  lowest_rated]

Unnamed: 0,movieId,title,genres
2689,3604,Gypsy (1962),Musical


In [63]:
# HIGHEST RATED MOVIES
highest_rated = mean_rating["rating"].idxmax()
movies.loc[movies['movieId'] ==  highest_rated]

Unnamed: 0,movieId,title,genres
48,53,Lamerica (1994),Adventure|Drama


In [64]:
# Show Number of people who rated movies rated movie highest
ratings[ratings['movieId'] == highest_rated]

Unnamed: 0,userId,movieId,rating,timestamp
13368,85,53,5.0,889468268
96115,603,53,5.0,963180003


In [65]:
# Show Number of people who rated movies rated movie Lowest
ratings[ratings['movieId'] == lowest_rated]

Unnamed: 0,userId,movieId,rating,timestamp
13633,89,3604,0.5,1520408880


the follwing movies have very low dataset.

we will use **BAYESIAN AVERAGE**

In [66]:
movie_stats = ratings.groupby("movieId")[["rating"]].agg(['count','mean'])
movie_stats.columns = movie_stats.columns.droplevel()
movie_stats.head()

Unnamed: 0_level_0,count,mean
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1
1,215,3.92093
2,110,3.431818
3,52,3.259615
4,7,2.357143
5,49,3.071429


#### The Bayesian average may offer more accurate quality ratings for flims with a small number of ratings

### User-Item Matrix Creation

I will create this User-Matrix using scipy csr matrix

In [67]:
from scipy.sparse import csr_matrix

def create_matrix(df):
    N = len(df["userId"].unique())
    M = len(df["movieId"].unique())
    
    # map Ids to indices
    user_mapper = dict(zip(np.unique(df["userId"]), list(range(N))))
    movie_mapper = dict(zip(np.unique(df["movieId"]), list(range(M))))    
    
    # Map Indices to IDs
    user_inv_mapper = dict(zip(list(range(N)),np.unique(df["userId"])))
    movie_inv_mapper = dict(zip(list(range(M)),np.unique(df["movieId"])))    
    
    user_index = [user_mapper[i] for i in df['userId']]
    movie_index = [movie_mapper[i] for i in df["movieId"]]
    
    X =csr_matrix((df['rating'], (movie_index, user_index)), shape=(M,N))
    
    return X, user_mapper, movie_mapper, user_inv_mapper, movie_inv_mapper
X, user_mapper, movie_mapper, user_inv_mapper, movie_inv_mapper = (create_matrix(ratings))

 ### A user matrix is a basic data strature in recomendation systems
    it is created by the code given above
 this is how it works
 
 1. To find the number of unique users and unique videos in the data set, N and M are computed.
 2. There four dictionaries produced:
     - User_mapper: Maps distict User IDs to indexes(User ID 1 becomes index 0.)
     - movie_mapper: Converts distict movie IDs into indices(movie ID 1 bexomes index 0)
     - user_inv_maper: Reverses the user mapper and maps indices back to user IDs.
     - movie_inv_mapper: Reverses movie_mapper  by mapping indices to movie IDs
 3. In oder to map the real user and movies IDs to their matching indices, the lists user_index and movie_index are generated
 4. a spers matrix X is created using Scipy function csr_matrix. the user and movie indices that correspond to the rating values. It has a form of (M,N):
     - M = QUANTITY of distict flims
     - N = quantuty of distict consumers
    

### Movie Similarity Analysis
we will use KNN *(k-Nearest Neighbors)* to perform this

In [68]:
from sklearn.neighbors import NearestNeighbors

def find_similar_movies(movie_id, X, k, metric='cosine', show_distance=False):
     
    neighbour_ids = []
     
    movie_ind = movie_mapper[movie_id]
    movie_vec = X[movie_ind]
    k+=1
    kNN = NearestNeighbors(n_neighbors=k, algorithm="brute", metric=metric)
    kNN.fit(X)
    movie_vec = movie_vec.reshape(1,-1)
    neighbour = kNN.kneighbors(movie_vec, return_distance=show_distance)
    for i in range(0,k):
        n = neighbour.item(i)
        neighbour_ids.append(movie_inv_mapper[n])
    neighbour_ids.pop(0)
    return neighbour_ids
 
# code written by Joseph Wathome
movie_titles = dict(zip(movies['movieId'], movies['title']))


In [69]:
movie_id = 3
similar_ids = find_similar_movies(movie_id, X, k=10)
movie_title = movie_titles[movie_id]

print(f"Since you watched {movie_title}")
for i in similar_ids:
    print(movie_titles[i])

Since you watched Grumpier Old Men (1995)
Grumpy Old Men (1993)
Striptease (1996)
Nutty Professor, The (1996)
Twister (1996)
Father of the Bride Part II (1995)
Broken Arrow (1996)
Bio-Dome (1996)
Truth About Cats & Dogs, The (1996)
Sabrina (1995)
Birdcage, The (1996)


### Movie recomendation with Respect to users Preference.

In [76]:
def recommend_movies_for_user(user_id, X, user_mapper, movie_mapper, movie_inv_mapper, k =10):
    df1 = ratings[ratings['userId'] == user_id]
    
    if df1.empty:
        print(f"user with ID {user_id} does not Exist.")
        return
    
    movie_id =df1[df1['rating'] == max(df1['rating'])]['movieId'].iloc[0]
    
    movie_titles = dict(zip(movies['movieId'], movies['title']))
    
    similar_ids = find_similar_movies(movie_id, X, k)
    movie_title = movie_titles.get(movie_id, 'Movie not found')
    
    if movie_title == 'Movie not found':
        print(f"Movie with ID {movie_id} Not Found")
        return
    
    print (f"Since you watched {movie_title}, you might also Like: ")
    for i in similar_ids:
        print (movie_titles.get(i, "Movie not found"))
        
# code written by Joseph Wathome
        

## Recommend the movies

In [77]:
user_id = 150 # replace this with your desired user ID

recommend_movies_for_user(user_id, X,user_mapper, movie_mapper, movie_inv_mapper, k=10)

Since you watched Twelve Monkeys (a.k.a. 12 Monkeys) (1995), you might also Like: 
Pulp Fiction (1994)
Terminator 2: Judgment Day (1991)
Independence Day (a.k.a. ID4) (1996)
Seven (a.k.a. Se7en) (1995)
Fargo (1996)
Fugitive, The (1993)
Usual Suspects, The (1995)
Jurassic Park (1993)
Star Wars: Episode IV - A New Hope (1977)
Heat (1995)


In [78]:
# if we use a user_id that is not in the rage of user IDs
user_id = 2300 # replace this with your desired user ID

recommend_movies_for_user(user_id, X,user_mapper, movie_mapper, movie_inv_mapper, k=10)

user with ID 2300 does not Exist.


# code written by Joseph Wathome

In conclusion, developing a Python recommendation system allows for the creation of tailored content recommendations that improve user experience and take into account user preferences. Through the utilization of collaborative filtering, content-based filtering, and hybrid techniques, these systems are able to offer customized recommendations to consumers for content, movies, or items. These systems use sophisticated methods such as closest neighbors and matrix factorization to find hidden patterns in item attributes and user behavior. Recommendation systems are able to adjust and get better over time thanks to the combination of machine learning and data-driven insights. In the end, these solutions are essential for raising consumer satisfaction, improving user engagement, and propelling corporate expansion in a variety of industries.