### importing necessary libraries

In [24]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel



### Loading dataset

In [25]:
movies_df = pd.read_csv('movies.csv')
ratings_df = pd.read_csv('ratings.csv')
movies_df


Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy
...,...,...,...
9737,193581,Black Butler: Book of the Atlantic (2017),Action|Animation|Comedy|Fantasy
9738,193583,No Game No Life: Zero (2017),Animation|Comedy|Fantasy
9739,193585,Flint (2017),Drama
9740,193587,Bungo Stray Dogs: Dead Apple (2018),Action|Animation


### Merging CSVs


In [26]:
merged_df = pd.merge(movies_df, ratings_df[['userId', 'rating']], left_index=True, right_index=True, how='left')
merged_df

Unnamed: 0,movieId,title,genres,userId,rating
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,1,4.0
1,2,Jumanji (1995),Adventure|Children|Fantasy,1,4.0
2,3,Grumpier Old Men (1995),Comedy|Romance,1,4.0
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance,1,5.0
4,5,Father of the Bride Part II (1995),Comedy,1,5.0
...,...,...,...,...,...
9737,193581,Black Butler: Book of the Atlantic (2017),Action|Animation|Comedy|Fantasy,64,4.0
9738,193583,No Game No Life: Zero (2017),Animation|Comedy|Fantasy,64,3.0
9739,193585,Flint (2017),Drama,64,4.5
9740,193587,Bungo Stray Dogs: Dead Apple (2018),Action|Animation,64,3.0


### Handling missing values and cleaning dataset

In [27]:
missing_values = merged_df.isnull().sum()

missing_values

movieId    0
title      0
genres     0
userId     0
rating     0
dtype: int64

### Splitting the data into training and testing sets

In [31]:
# Feature extraction using TF-IDF
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf_vectorizer.fit_transform(merged_df['genres'])  # Adjust 'genres' to your relevant feature

# Calculate cosine similarity
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)


# Function to get movie recommendations
def get_recommendations(movie_title, cosine_sim=cosine_sim):
    # Clean and standardize the movie title for comparison
    movie_title = movie_title.strip().lower()
    
    # Check if the movie title exists in the DataFrame
    if movie_title in merged_df['title'].str.strip().str.lower().values:
        # Get the index of the movie
        idx = merged_df.index[merged_df['title'].str.strip().str.lower() == movie_title].tolist()[0]
        
        # Calculate cosine similarity scores
        sim_scores = list(enumerate(cosine_sim[idx]))
        sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
        
        # Select the top 10 similar movies
        movie_indices = [i[0] for i in sim_scores[1:11]]
        
        # Return the titles of the top 10 similar movies
        return merged_df['title'].iloc[movie_indices]
    else:
        return "Movie not found in the dataset"

# Example usage
movie_title = 'Flint (2017)'
recommendations = get_recommendations(movie_title)
print(recommendations)





25                       Othello (1995)
30               Dangerous Minds (1995)
36      Cry, the Beloved Country (1995)
39                   Restoration (1995)
50                       Georgia (1995)
51         Home for the Holidays (1995)
55            Mr. Holland's Opus (1995)
105     Boys of St. Vincent, The (1992)
120      Basketball Diaries, The (1995)
121    Awfully Big Adventure, An (1995)
Name: title, dtype: object
