# Movie Recommendation System On User Preference

## Importing Libraries

In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import pickle

In [6]:
import nltk
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Admin\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping corpora\stopwords.zip.


True

## Reading Dataset

In [7]:
df = pd.read_csv('movies_dataset.csv')

In [8]:
df.head()

Unnamed: 0,Rank,Title,Genre,Description,Director,Actors,Year,Runtime (Minutes),Rating,Votes,Revenue (Millions),Metascore
0,1,Guardians of the Galaxy,"Action,Adventure,Sci-Fi",A group of intergalactic criminals are forced ...,James Gunn,"Chris Pratt, Vin Diesel, Bradley Cooper, Zoe S...",2014,121,8.1,757074,333.13,76.0
1,2,Prometheus,"Adventure,Mystery,Sci-Fi","Following clues to the origin of mankind, a te...",Ridley Scott,"Noomi Rapace, Logan Marshall-Green, Michael Fa...",2012,124,7.0,485820,126.46,65.0
2,3,Split,"Horror,Thriller",Three girls are kidnapped by a man with a diag...,M. Night Shyamalan,"James McAvoy, Anya Taylor-Joy, Haley Lu Richar...",2016,117,7.3,157606,138.12,62.0
3,4,Sing,"Animation,Comedy,Family","In a city of humanoid animals, a hustling thea...",Christophe Lourdelet,"Matthew McConaughey,Reese Witherspoon, Seth Ma...",2016,108,7.2,60545,270.32,59.0
4,5,Suicide Squad,"Action,Adventure,Fantasy",A secret government agency recruits some of th...,David Ayer,"Will Smith, Jared Leto, Margot Robbie, Viola D...",2016,123,6.2,393727,325.02,40.0


## Pre-processing

In [9]:
# Convert text to lowercase
df['Description'] = df['Description'].str.lower()

# Remove punctuation
df['Description'] = df['Description'].str.replace('[^\w\s]','')

# Remove stopwords
stop_words = set(stopwords.words('english'))
df['Description'] = df['Description'].apply(lambda x: ' '.join([word for word in x.split() if word not in stop_words]))


  df['Description'] = df['Description'].str.replace('[^\w\s]','')


## Algorithms

In [10]:
# Create a TF-IDF matrix for the movie descriptions
tfidf = TfidfVectorizer()
tfidf_matrix = tfidf.fit_transform(df['Description'])

In [11]:
# Calculate cosine similarity between the movies based on their descriptions
cosine_sim = cosine_similarity(tfidf_matrix)

In [12]:
def get_recommendations(title, cosine_sim=cosine_sim, df=df):
    
    # Get the index of the movie that matches the title
    indices = pd.Series(df.index, index=df['Title']).drop_duplicates()
    idx = indices[title]

    # Get the pairwise similarity scores for all movies with that movie
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the movies based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the top 10 most similar movies
    sim_scores = sim_scores[1:11]
    movie_indices = [i[0] for i in sim_scores]

    # Create a comma-separated string of movie titles
    movie_titles = ", ".join(df['Title'].iloc[movie_indices].values)

    return movie_titles

## Prediction

In [13]:
# Test the function with a movie title
get_recommendations('The Dark Knight')

'The Dark Knight Rises, Revolutionary Road, Thor: The Dark World, The Avengers, The Book of Life, Fences, Mr. Brooks, Shin Gojira, Chappie, Brave'

In [14]:
# pickle.dump(cosine_sim, open('model.pkl','wb'))