In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
import pandas as pd
df=pd.read_csv('/content/movies.csv')

In [6]:
# Print the first few rows
print("First few rows of the dataset:\n", df.head())


First few rows of the dataset:
                                              name rating      genre  year  \
0                                     The Shining      R      Drama  1980   
1                                 The Blue Lagoon      R  Adventure  1980   
2  Star Wars: Episode V - The Empire Strikes Back     PG     Action  1980   
3                                       Airplane!     PG     Comedy  1980   
4                                      Caddyshack      R     Comedy  1980   

                        released  score      votes         director  \
0  June 13, 1980 (United States)    8.4   927000.0  Stanley Kubrick   
1   July 2, 1980 (United States)    5.8    65000.0   Randal Kleiser   
2  June 20, 1980 (United States)    8.7  1200000.0   Irvin Kershner   
3   July 2, 1980 (United States)    7.7   221000.0     Jim Abrahams   
4  July 25, 1980 (United States)    7.3   108000.0     Harold Ramis   

                    writer            star         country      budget  \
0   

In [7]:
# Combining important features into a single string
#  to handle missing values
df['important_features'] = df['genre'].fillna('') + ' ' + df['director'].fillna('') + ' ' + df['star'].fillna('')


In [8]:
# Printing the new DataFrame with the combined features for verification
print("Dataset with important features:\n", df[['name', 'important_features']].head())


Dataset with important features:
                                              name  \
0                                     The Shining   
1                                 The Blue Lagoon   
2  Star Wars: Episode V - The Empire Strikes Back   
3                                       Airplane!   
4                                      Caddyshack   

                        important_features  
0     Drama Stanley Kubrick Jack Nicholson  
1  Adventure Randal Kleiser Brooke Shields  
2        Action Irvin Kershner Mark Hamill  
3          Comedy Jim Abrahams Robert Hays  
4          Comedy Harold Ramis Chevy Chase  


In [11]:
# Import the necessary class
from sklearn.feature_extraction.text import TfidfVectorizer

# Convert textual data to numerical data using TF-IDF
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df['important_features'])

print("TF-IDF matrix shape:", tfidf_matrix.shape)



TF-IDF matrix shape: (7668, 6090)


In [23]:
from sklearn.metrics.pairwise import linear_kernel
# Computing cosine similarity between movies
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

# Print the shape of the cosine similarity matrix
print("Cosine similarity matrix shape:", cosine_sim.shape)



Cosine similarity matrix shape: (7668, 7668)


In [22]:
def get_recommendations(title, cosine_sim=cosine_sim):
    # Check if the movie title is in the dataset
    if title not in df['name'].values:
        return "Movie was not found in the dataset."

    # Get the index of the movie that matches the title
    idx = df.index[df['name'] == title].tolist()[0]

    # Get similarity scores for all movies with the given movie
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort movies based on similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get indices of top 5 most similar movies (excluding the first one as it is the movie itself)
    sim_scores = sim_scores[1:6]

    # Extract movie indices
    movie_indices = [i[0] for i in sim_scores]

    # Return top 5 similar movies
    return df['name'].iloc[movie_indices].tolist()




In [26]:
# Example usage: Get recommendations for a specific movie
movie_title = "The Evil Dead"
print(f"Recommendations for '{movie_title}':\n", get_recommendations(movie_title))



Recommendations for 'The Evil Dead':
 ['Evil Dead II', 'Army of Darkness', 'A Simple Plan', 'For Love of the Game', 'Oz the Great and Powerful']


In [27]:
# Test with another movie
test_title = "Das Boot"  # can replace with another title
print(f"Recommendations for '{test_title}':\n", get_recommendations(test_title))

Recommendations for 'Das Boot':
 ['The NeverEnding Story', 'The Perfect Storm', 'Poseidon', 'In the Line of Fire', 'Troy']
