**Step-1: Import the Libraries**

In [5]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

**Step-2: Load the Datasets**

In [9]:
df = pd.read_csv(r"C:\Users\sangi\Downloads\excel_data\tmdb_5000_movies.csv")
df = df[['title', 'genres']]
df.head()

Unnamed: 0,title,genres
0,Avatar,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam..."
1,Pirates of the Caribbean: At World's End,"[{""id"": 12, ""name"": ""Adventure""}, {""id"": 14, ""..."
2,Spectre,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam..."
3,The Dark Knight Rises,"[{""id"": 28, ""name"": ""Action""}, {""id"": 80, ""nam..."
4,John Carter,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam..."


**Step-3: Data Cleanning**

In [12]:
import ast

def extract_genres(text):
    genres = ast.literal_eval(text)
    return " ".join([g['name'] for g in genres])

df['genres'] = df['genres'].apply(extract_genres)

**Step 4: Converting Genres to TF-IDF Vectors**

In [15]:
tfidf = TfidfVectorizer(stop_words='english')
genre_matrix = tfidf.fit_transform(df['genres'])
genre_matrix

<4803x22 sparse matrix of type '<class 'numpy.float64'>'
	with 12703 stored elements in Compressed Sparse Row format>

**Step 5: Compute Cosine Similarity**

In [20]:
similarity = cosine_similarity(genre_matrix)
similarity

array([[1.        , 0.74526744, 0.42944732, ..., 0.        , 0.        ,
        0.        ],
       [0.74526744, 1.        , 0.5762325 , ..., 0.        , 0.        ,
        0.        ],
       [0.42944732, 0.5762325 , 1.        , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.        , 0.        , 0.        , ..., 1.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        1.        ]])

**Step 6: Recommendation Function**

In [23]:
def recommend(movie_title, n=5):
    index = df[df['title'] == movie_title].index[0]
    scores = list(enumerate(similarity[index]))
    scores = sorted(scores, key=lambda x: x[1], reverse=True)[1:n+1]
    
    for i in scores:
        print(df.iloc[i[0]]['title'])

**Step-7: Test**

In [26]:
recommend("Avatar")

Superman Returns
Man of Steel
X-Men: Days of Future Past
Jupiter Ascending
The Wolverine
