# movie recommendation system

by Sumit kumar 

In [1]:
import gradio as gr
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel


In [8]:
data = pd.read_csv('data.csv')
data.head(10)

Unnamed: 0,Movie Name,Release Period,Whether Remake,Whether Franchise,Genre,New Actor,New Director,New Music Director,Lead Star,Director,Music Director,Number of Screens,Revenue(INR),Budget(INR)
0,Golden Boys,Normal,No,No,suspense,Yes,No,No,Jeet Goswami,Ravi Varma,Baba Jagirdar,5,5000000,85000
1,Kaccha Limboo,Holiday,No,No,drama,Yes,No,Yes,Karan Bhanushali,Sagar Ballary,Amardeep Nijjer,75,15000000,825000
2,Not A Love Story,Holiday,No,No,thriller,No,No,No,Mahie Gill,Ram Gopal Verma,Sandeep Chowta,525,75000000,56700000
3,Qaidi Band,Holiday,No,No,drama,Yes,No,No,Aadar Jain,Habib Faisal,Amit Trivedi,800,210000000,4500000
4,Chaatwali,Holiday,No,No,adult,Yes,Yes,Yes,Aadil Khan,Aadil Khan,Babloo Ustad,1,1000000,1075000
5,Shuttlecock Boys,Normal,No,No,comedy,Yes,Yes,Yes,Aakar Kaushik,Hemant Gaba,Avinash Baghel,10,5000000,170000
6,Dirty Marriage,Holiday,No,No,adult,Yes,No,Yes,Aakash,Priyanka,Dharma,2,1500000,35000
7,Future To Bright Hai Ji,Holiday,No,No,drama,No,Yes,Yes,Aamir Bashir,Sanjay Amar,Amir Ali,30,15000000,825000
8,Ghajini,Holiday,Yes,No,action,No,Yes,No,Aamir Khan,A.R. Murugadoss,A.R. Rehman,1550,520000000,1945820000
9,Taare Zameen Par,Holiday,No,No,drama,No,Yes,No,Aamir Khan,Aamir Khan,Shankar - Ehsaan - Loy,500,180000000,875785000


In [3]:
data['overview'] = data[['Genre', 'Lead Star']].apply(lambda x: ' '.join(x), axis=1)
data['overview'] = data['overview'].str.lower()

In [4]:
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(data['overview'])

In [None]:
#Compute the cosine similarity matrix
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

In [6]:
# Function to recommend movies based on cosine similarity scores
def recommend_movies(title, cosine_sim=cosine_sim, data=data, top=10):
    indices = pd.Series(data.index, index=data['Movie Name']).drop_duplicates()
    movie_index = indices[title]
    similarity_scores = list(enumerate(cosine_sim[movie_index]))
    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)
    similarity_scores = similarity_scores[1:top+1]
    movie_indices = [i[0] for i in similarity_scores]
    return data['Movie Name'].iloc[movie_indices]

In [7]:
# Create a Gradio interface
def recommend_movie(movie_title):
    recommended_movies = recommend_movies(movie_title)
    return recommended_movies

iface = gr.Interface(
    fn=recommend_movie,
    inputs="text",
    outputs="text",
    title="Bollywood Movie Recommender",
    description="Enter a movie name to get recommendations of similar movies.",
    theme="default"
)

iface.launch()

Running on local URL:  http://127.0.0.1:7861

To create a public link, set `share=True` in `launch()`.


