<a href="https://colab.research.google.com/github/fetterollie/rec-systems-project/blob/main/web_app.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Creat web app on Gradio

## install, import and mount

In [1]:
!pip install gradio
import gradio as gr
import pandas as pd

# Mount google drive
from google.colab import drive
drive.mount('/content/gdrive')

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


# Get the saved dataframe for userid = 2

In [2]:
df = pd.read_csv('/content/gdrive/MyDrive/rec-systems-project/data/user2_prediction_result.csv')
df.head(10)

Unnamed: 0.1,Unnamed: 0,title,prediction,genres,split_genres
0,899,Star Wars: Episode V - The Empire Strikes Back...,4.411833,action|adventure|sci-fi,"['action', 'adventure', 'sci-fi']"
1,2229,Fight Club (1999),4.386436,action|crime|drama|thriller,"['action', 'crime', 'drama', 'thriller']"
2,600,Dr. Strangelove or: How I Learned to Stop Worr...,4.383926,comedy|war,"['comedy', 'war']"
3,907,Lawrence of Arabia (1962),4.3631,adventure|drama|war,"['adventure', 'drama', 'war']"
4,660,"Godfather, The (1972)",4.349665,crime|drama,"['crime', 'drama']"
5,695,Casablanca (1942),4.341353,drama|romance,"['drama', 'romance']"
6,900,"Princess Bride, The (1987)",4.32489,action|adventure|comedy|fantasy|romance,"['action', 'adventure', 'comedy', 'fantasy', '..."
7,46,"Usual Suspects, The (1995)",4.321951,crime|mystery|thriller,"['crime', 'mystery', 'thriller']"
8,910,Apocalypse Now (1979),4.315185,action|drama|war,"['action', 'drama', 'war']"
9,224,Star Wars: Episode IV - A New Hope (1977),4.313092,action|adventure|sci-fi,"['action', 'adventure', 'sci-fi']"


## Clean "split_genres" column and collect the genre list

Need the genre list as the dropdown options for web app

In [3]:
# Clean "split_genres" column

test_df = df.copy()
test_df['split_genres']=test_df['split_genres'].str[1:-1].str.split(',').tolist()
test_df.head()

Unnamed: 0.1,Unnamed: 0,title,prediction,genres,split_genres
0,899,Star Wars: Episode V - The Empire Strikes Back...,4.411833,action|adventure|sci-fi,"['action', 'adventure', 'sci-fi']"
1,2229,Fight Club (1999),4.386436,action|crime|drama|thriller,"['action', 'crime', 'drama', 'thriller']"
2,600,Dr. Strangelove or: How I Learned to Stop Worr...,4.383926,comedy|war,"['comedy', 'war']"
3,907,Lawrence of Arabia (1962),4.3631,adventure|drama|war,"['adventure', 'drama', 'war']"
4,660,"Godfather, The (1972)",4.349665,crime|drama,"['crime', 'drama']"


In [4]:
# Check the genre_df

genre_df = test_df['split_genres']
genre_df.head(10)

0                  ['action',  'adventure',  'sci-fi']
1          ['action',  'crime',  'drama',  'thriller']
2                                   ['comedy',  'war']
3                      ['adventure',  'drama',  'war']
4                                  ['crime',  'drama']
5                                ['drama',  'romance']
6    ['action',  'adventure',  'comedy',  'fantasy'...
7                   ['crime',  'mystery',  'thriller']
8                         ['action',  'drama',  'war']
9                  ['action',  'adventure',  'sci-fi']
Name: split_genres, dtype: object

In [5]:
# Collect genre_list and more cleaning

genre_list=[]

for each in genre_df:
  for i in each:
    if i not in genre_list:
      genre_list.append(i)

genre_list = [i.strip("'").replace(" ", "").strip("'") if type(i) == str else str(i) for i in genre_list]

In [6]:
# Check genre_list

genre_list

['action',
 'adventure',
 'sci-fi',
 'crime',
 'drama',
 'thriller',
 'comedy',
 'war',
 'adventure',
 'crime',
 'drama',
 'romance',
 'comedy',
 'fantasy',
 'mystery',
 'mystery',
 'horror',
 'horror',
 'western',
 'film-noir',
 'animation',
 'children',
 'imax',
 'animation',
 'musical',
 'film-noir',
 'documentary',
 'fantasy',
 'thriller',
 'children',
 'musical',
 'romance',
 'western',
 'sci-fi',
 'documentary',
 '(nogenreslisted)',
 'war']

In [7]:
# Remove '(nogenreslisted)'

genre_list.remove('(nogenreslisted)')

In [8]:
# Check genre_list again

genre_list

['action',
 'adventure',
 'sci-fi',
 'crime',
 'drama',
 'thriller',
 'comedy',
 'war',
 'adventure',
 'crime',
 'drama',
 'romance',
 'comedy',
 'fantasy',
 'mystery',
 'mystery',
 'horror',
 'horror',
 'western',
 'film-noir',
 'animation',
 'children',
 'imax',
 'animation',
 'musical',
 'film-noir',
 'documentary',
 'fantasy',
 'thriller',
 'children',
 'musical',
 'romance',
 'western',
 'sci-fi',
 'documentary',
 'war']

# Deploy web app 1 - output movie list

In [9]:


def movie_recommendation(rating, genre, number):
  """
  number = how many recommended movies
  rating = select movie rating
  genre = select movie genre
  """
    
  # number = 5, 10, 15, 20
  # rating = 3.35
  # genre = 'action'
  
  filter_rating_df = df[(df['prediction'] <= rating+0.5) &
                     (df['prediction'] >= rating-0.5)]
  filter_df = filter_rating_df[filter_rating_df['split_genres'].str.contains(genre, case=False)]
  rec_movie = filter_df['title'][:int(number)].tolist()
  return rec_movie

rating = gr.inputs.Slider(1, 5, label='Slect movie ratings')
genre = gr.inputs.Dropdown(genre_list, label='Pick a movie genre')

number = gr.Number(label='How many recommended movies?')
# number = gr.inputs.CheckboxGroup([5,10,15,20], type="index", label='How many recommended movies?')

gr.Interface(fn=movie_recommendation,
             inputs=[rating, genre, number],
             outputs=['text'],
             title= "Movie recommendations for you").launch(debug=True, share=True)



Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
Running on public URL: https://5c6a1126-2b64-4254.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades (NEW!), check out Spaces: https://huggingface.co/spaces


Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://5c6a1126-2b64-4254.gradio.live




# # Deploy web app 2 - output title and predicted rating dataframe

In [10]:
def movie_recommendation(rating, genre, number):
  """
  number = how many recommended movies
  rating = select movie rating
  genre = select movie genre
  """
    
  # number = 5, 10, 15, 20
  # rating = 3.35
  # genre = 'action'
  
  filter_rating_df = df[(df['prediction'] <= rating+0.5) &
                     (df['prediction'] >= rating-0.5)]
  filter_df = filter_rating_df[filter_rating_df['split_genres'].str.contains(genre, case=False)]
  rec_movie = filter_df[['title', 'prediction']][:int(number)]
  rec_movie.rename(columns={'prediction':'predicted rating'}, inplace=True)
  return rec_movie

# Gradio
rating = gr.inputs.Slider(1, 5, label='Slect movie ratings')
genre = gr.inputs.Dropdown(genre_list, label='Pick a movie genre')
number = gr.Number(label='How many recommended movies?')
output = gr.Dataframe(headers=["title", "predicted rating"])


gr.Interface(fn= movie_recommendation,
             inputs=[rating, genre, number],
             outputs=output,
             title= "Movie recommendations for you").launch(debug=True, share=True)




Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
Running on public URL: https://ffcc3aae-96f8-4ed4.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades (NEW!), check out Spaces: https://huggingface.co/spaces


Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://ffcc3aae-96f8-4ed4.gradio.live


