## Recommendation engine with GUI using python and ML 

<b>Input:</b>

1. Number of movies to recommend
2. Genre(s) of movie

<b> Output: </b>

1. Movies Recommended
2. Ratings
3. Number of Ratings

In [1]:
import numpy as np
import pandas as pd

In [2]:
movies = pd.read_csv("movies.csv")
movies

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy
...,...,...,...
9737,193581,Black Butler: Book of the Atlantic (2017),Action|Animation|Comedy|Fantasy
9738,193583,No Game No Life: Zero (2017),Animation|Comedy|Fantasy
9739,193585,Flint (2017),Drama
9740,193587,Bungo Stray Dogs: Dead Apple (2018),Action|Animation


In [3]:
ratings = pd.read_csv("ratings.csv")
ratings

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931
...,...,...,...,...
100831,610,166534,4.0,1493848402
100832,610,168248,5.0,1493850091
100833,610,168250,5.0,1494273047
100834,610,168252,5.0,1493846352


In [4]:
movie_data = pd.merge(ratings, movies, on='movieId')
movie_data

Unnamed: 0,userId,movieId,rating,timestamp,title,genres
0,1,1,4.0,964982703,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,5,1,4.0,847434962,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
2,7,1,4.5,1106635946,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
3,15,1,2.5,1510577970,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
4,17,1,4.5,1305696483,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
...,...,...,...,...,...,...
100831,610,160341,2.5,1479545749,Bloodmoon (1997),Action|Thriller
100832,610,160527,4.5,1479544998,Sympathy for the Underdog (1971),Action|Crime|Drama
100833,610,160836,3.0,1493844794,Hazard (2005),Action|Drama|Thriller
100834,610,163937,3.5,1493848789,Blair Witch (2016),Horror|Thriller


### Getting Pivot table in the Required Format

In [5]:
user_movie_rating = pd.pivot_table(movie_data, index=['title'], columns=['genres'], values='rating').reset_index()
user_movie_rating = user_movie_rating.fillna(0)
user_movie_rating

genres,title,(no genres listed),Action,Action|Adventure,Action|Adventure|Animation,Action|Adventure|Animation|Children,Action|Adventure|Animation|Children|Comedy,Action|Adventure|Animation|Children|Comedy|Fantasy,Action|Adventure|Animation|Children|Comedy|IMAX,Action|Adventure|Animation|Children|Comedy|Romance,...,Romance|Thriller,Romance|War,Romance|Western,Sci-Fi,Sci-Fi|IMAX,Sci-Fi|Thriller,Sci-Fi|Thriller|IMAX,Thriller,War,Western
0,'71 (2014),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,'Hellboy': The Seeds of Creation (2004),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,'Round Midnight (1986),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,'Salem's Lot (2004),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,'Til There Was You (1997),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9714,eXistenZ (1999),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9715,xXx (2002),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9716,xXx: State of the Union (2005),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9717,¡Three Amigos! (1986),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### Filtering Movie Titles based on inputted Genre

In [6]:
genre_movie = user_movie_rating[user_movie_rating['Horror']!=0][['title', 'Horror']].sort_values('Horror', ascending=False)
genre_movie

genres,title,Horror
8466,The Eye: Infinity (2005),5.00
7766,Slumber Party Massacre II (1987),5.00
7767,Slumber Party Massacre III (1990),5.00
7866,Sorority House Massacre (1986),5.00
7867,Sorority House Massacre II (1990),5.00
...,...,...
1726,Children of the Corn III (1994),1.00
454,"Amityville Curse, The (1990)",0.75
4119,"Human Centipede, The (First Sequence) (2009)",0.50
7375,Satanic (2016),0.50


### Calculating number of ratings

In [7]:
count_data = movie_data.groupby(['title', 'genres'])['rating'].count().reset_index()
count_data

Unnamed: 0,title,genres,rating
0,'71 (2014),Action|Drama|Thriller|War,1
1,'Hellboy': The Seeds of Creation (2004),Action|Adventure|Comedy|Documentary|Fantasy,1
2,'Round Midnight (1986),Drama|Musical,2
3,'Salem's Lot (2004),Drama|Horror|Mystery|Thriller,1
4,'Til There Was You (1997),Drama|Romance,2
...,...,...,...
9719,eXistenZ (1999),Action|Sci-Fi|Thriller,22
9720,xXx (2002),Action|Crime|Thriller,24
9721,xXx: State of the Union (2005),Action|Crime|Thriller,5
9722,¡Three Amigos! (1986),Comedy|Western,26


In [8]:
# Renaming columns for Readability

related_movies = genre_movie.join(count_data['rating'])
related_movies = related_movies.rename(columns={'title': 'Movie(s)', 'Horror':'Ratings', 'rating':'Number of Ratings'})
related_movies

Unnamed: 0,Movie(s),Ratings,Number of Ratings
8466,The Eye: Infinity (2005),5.00,1
7766,Slumber Party Massacre II (1987),5.00,5
7767,Slumber Party Massacre III (1990),5.00,18
7866,Sorority House Massacre (1986),5.00,10
7867,Sorority House Massacre II (1990),5.00,10
...,...,...,...
1726,Children of the Corn III (1994),1.00,1
454,"Amityville Curse, The (1990)",0.75,2
4119,"Human Centipede, The (First Sequence) (2009)",0.50,33
7375,Satanic (2016),0.50,1


### Details of Numeric Data is considered

In [9]:
related_movies.describe()

Unnamed: 0,Ratings,Number of Ratings
count,167.0,167.0
mean,2.820985,5.664671
std,1.007566,13.55738
min,0.5,1.0
25%,2.0,1.0
50%,3.0,2.0
75%,3.5,5.0
max,5.0,157.0


In [10]:
related_movies.mean()

Ratings              2.820985
Number of Ratings    5.664671
dtype: float64

### Consider the following scenario:

Something that one user ranked 5 stars would have a better average than that of something 188 users ranked 4 stars if we directly considered rating value. In order to make smart recommendation that take into account the ratings given and the number of ratings, we can make use of Bayesian Probability. 

This approach requires two parameters:

1. What do you think is the true "default" rating of an item, if you have no ratings at all for the item? Call this number <b>R</b>, the "initial belief". (Here, I'm considering R as mean value of Ratings i.e., 2.820985)
2. How much weight do you give to the initial belief, compared to the user ratings? Call this <b>W</b>, where the initial belief is "worth" W user ratings of that value. (Here, I'm considering W as mean value of Number of Ratings i.e., 5.664671)

We compute the final ratings for various scenarios below:<br/>

<i>
1. 100 (user) ratings of 4: (3*2 + 100*4) / (3 + 100) = 3.94<br/>
2. 3 ratings of 5 and 1 rating of 4: (3*2 + 3*5 + 1*4) / (3 + 3 + 1) = 3.57<br/>
3. 10 ratings of 4: (3*2 + 10*4) / (3 + 10) = 3.54<br/>
4. 1 rating of 5: (3*2 + 1*5) / (3 + 1) = 2.75<br/>
5. No user ratings: (3*2 + 0) / (3 + 0) = 2<br/>
6. 1 rating of 1: (3*2 + 1*1) / (3 + 1) = 1.75<br/></i>


This computation takes into consideration the number of user ratings, and the values of those ratings. As a result, the final score roughly corresponds to how happy one can expect to be about a particular item, given the data.

Credit: https://stackoverflow.com/questions/2495509/how-to-balance-number-of-ratings-versus-the-ratings-themselves

In [11]:
R, W = 2.820985, 5.664671

In [12]:
related_movies['ranks'] = ((R * W ) + (related_movies['Ratings'] * related_movies['Number of Ratings']))\
/ (related_movies['Number of Ratings'] + W)

In [13]:
related_movies

Unnamed: 0,Movie(s),Ratings,Number of Ratings,ranks
8466,The Eye: Infinity (2005),5.00,1,3.147935
7766,Slumber Party Massacre II (1987),5.00,5,3.842589
7767,Slumber Party Massacre III (1990),5.00,18,4.478404
7866,Sorority House Massacre (1986),5.00,10,4.212023
7867,Sorority House Massacre II (1990),5.00,10,4.212023
...,...,...,...,...
1726,Children of the Corn III (1994),1.00,1,2.547755
454,"Amityville Curse, The (1990)",0.75,2,2.280587
4119,"Human Centipede, The (First Sequence) (2009)",0.50,33,0.840042
7375,Satanic (2016),0.50,1,2.472733


### Now, sort by ranks for best movie recommendation

In [14]:
related_movies.sort_values('ranks', ascending=False, inplace=True)

In [15]:
related_movies

Unnamed: 0,Movie(s),Ratings,Number of Ratings,ranks
7767,Slumber Party Massacre III (1990),5.0,18,4.478404
7866,Sorority House Massacre (1986),5.0,10,4.212023
7867,Sorority House Massacre II (1990),5.0,10,4.212023
7766,Slumber Party Massacre II (1987),5.0,5,3.842589
5624,Michael Jackson's Thriller (1983),4.0,23,3.767005
...,...,...,...,...
2537,Dracula 2000 (2000),1.8,29,1.966843
3711,Halloween 5: The Revenge of Michael Myers (1989),1.5,27,1.729084
3187,Frankenstein Meets the Wolf Man (1943),1.0,11,1.618991
2256,Deadtime Stories (1987),1.0,12,1.583950


## Top 20 movies

In [16]:
related_movies[['Movie(s)', 'Ratings', 'Number of Ratings']].head(20)

Unnamed: 0,Movie(s),Ratings,Number of Ratings
7767,Slumber Party Massacre III (1990),5.0,18
7866,Sorority House Massacre (1986),5.0,10
7867,Sorority House Massacre II (1990),5.0,10
7766,Slumber Party Massacre II (1987),5.0,5
5624,Michael Jackson's Thriller (1983),4.0,23
9140,Vampire in Venice (Nosferatu a Venezia) (Nosfe...,5.0,4
7928,Spirit Camp (2009),4.5,5
6841,Prom Night IV: Deliver Us From Evil (1992),4.0,8
4942,Last Shift (2014),4.0,8
8509,The Hunger (1983),3.666667,18


### GUI Implementation using Tkinter


In [17]:
from tkinter import *
import tkinter as tk
from pandastable import Table

In [18]:
fields = ('Enter the Genre', 'Number of Movies')

def movie_recommendation(entries):
    
    window = tk.Toplevel(root)
    window.minsize(376, 154) 
    
    try:
        # Avoid empty values or invalid values
        flag = int(entries['Number of Movies'].get())
        
        # Check for key error
        if entries['Enter the Genre'].get() in user_movie_rating.columns:

            f = Frame(window)
            f.pack(fill=BOTH,expand=1)

            # Fetching movies of requested genre
            genre_movie = user_movie_rating[user_movie_rating[entries['Enter the Genre'].get()]!=0][
                ['title', entries['Enter the Genre'].get()]].sort_values(entries['Enter the Genre'].get(), ascending=False)

            count_data = movie_data.groupby(['title', 'genres'])['rating'].count().reset_index()

            related_movies = genre_movie.join(count_data['rating'])
            related_movies = related_movies.rename(
                columns={'title': 'Movie(s)', entries['Enter the Genre'].get():'Ratings', 'rating':'Number of Ratings'})

            # Calculating ranks
            R, W = 2.820985, 5.664671
            related_movies['ranks'] = ((R * W ) + (related_movies['Ratings'] * related_movies['Number of Ratings'])
                                      )/ (related_movies['Number of Ratings'] + W)

            related_movies.sort_values('ranks', ascending=False, inplace=True)

            df = related_movies[['Movie(s)', 'Ratings', 'Number of Ratings']].iloc[:int(entries['Number of Movies'].get())]

            # Passing data to PandasTable to display it as spread sheet
            pt = Table(f, dataframe=df, showtoolbar=True, showstatusbar=True)
            pt.show()

        else:
            lab = Label(window, text="KEY ERROR! Enter Valid Genre")
            lab.config(anchor=CENTER)
            lab.pack() 
    
    except ValueError:
        lab = Label(window, text="ENTER VALID DATA")
        lab.pack()

   
        
    
def makeform(root, fields):
    
    entries = {}
    for field in fields:
        row = Frame(root)
        lab = Label(row, width=22, text=field+": ", anchor='w')
        ent = Entry(row)
        ent.insert(0,"")
        row.pack(side = TOP, fill = X, padx = 5 , pady = 5)
        lab.pack(side = LEFT)
        ent.pack(side = RIGHT, expand = YES, fill = X)
        entries[field] = ent
        
    return entries

In [19]:
if __name__ == '__main__':
    root = Tk()
    
    # setting the minimum size of the root window 
    root.minsize(576, 154) 
    
    lab = Label(text="(For multiple Genres put | between them)")
    lab.pack()
    
    ents = makeform(root, fields)
    root.bind('<Return>', (lambda event, e = ents: fetch(e)))
    # For multiple Genre put | between them

    b1 = Button(root, text = 'Get Recommendation', command=(lambda e = ents: movie_recommendation(e)))
    b1.pack(side = BOTTOM, padx = 5, pady = 5)
    
    
root.mainloop()

### OUTPUT

### Interface


<img src='Interface.png'>

### Error when invalid/empty data


<img src='Error1.png'>

### Error when invalid genre is entered


<img src='Error2.png'>

### Output for Single Genre

<img src='Output1.png'>

### Output for Multiple Genres

<img src='Output2.png'>

                                                Prepared by J.Haripriya