In [9]:
import numpy as np
import pandas as pd
from sklearn.neighbors import NearestNeighbors as nn
import matplotlib.pyplot as plt 
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from difflib import get_close_matches

# Load the dataset containing movie information from a CSV file (Bhumik)
movies = pd.read_csv('/Users/bhumiktandon/Desktop/BTpy/BT_imdb.csv')

# Define the features we're interested in, matching the CSV column names (Bhumik)
features = ['orig_title', 'date_x', 'score', 'genre', 'overview', 'orig_lang']

# Create a DataFrame 'x' with only the selected features for processing (Bhumik)
x = movies[features]
x.head()

# Simplify the movies DataFrame to include essential columns for recommendations (Bhumik)
movies = movies[['orig_title', 'score', 'genre', 'overview', 'orig_lang']]
movies.isnull().sum()  # Check for missing values in the DataFrame (Bhumik)

# Normalize the 'score' column using Min-Max scaling to bring values between 0 and 1 (Bhumik)
movies['score'] = (movies['score'] - movies['score'].min()) / (movies['score'].max() - movies['score'].min())

def rep(a, s):
    """Remove unwanted characters from a string"""
    for item in a:
        s = s.replace(item, '')
    return s

# Fill NaN values with empty strings to avoid issues during text processing (Bhumik)
movies.fillna('', inplace=True)

# Clean the 'genre' column by removing unwanted characters for better text processing (Bhumik)
movies['genre'] = movies['genre'].apply(lambda x: rep([',', '[', ']', "'"], x))

# Combine relevant text data into a new column for calculating similarity (Bhumik)
movies['new'] = movies['orig_title'] + ' ' + movies['genre'] + ' ' + movies['overview'] + ' ' + movies['orig_lang']
movies.isnull().sum()  # Verify there are no missing values after processing (Bhumik)

# Create TF-IDF feature vectors from the combined text data (Bhumik)
vectorizer = TfidfVectorizer()
feature_vectors = vectorizer.fit_transform(movies['new'].values)
print(feature_vectors)

# Calculate cosine similarity between the feature vectors (Bhumik)
similarity = cosine_similarity(feature_vectors)
similarity.shape  # Get the shape of the similarity matrix (Bhumik)

# Prompt the user for their favorite movie title for recommendations (Bhumik)
movie_name = input('Enter your favourite movie: ')
closest_name = get_close_matches(movie_name, movies.orig_title.values, 1)[0]
print(f'Closest match found: {closest_name}')

# Retrieve the row of the closest matching movie (Bhumik)
movie = movies[movies['orig_title'] == closest_name]
movie_index = movie.index.values[0]  # Get the index of the matched movie (Bhumik)

# Sort and recommend similar movies based on cosine similarity (Bhumik)
ls = sorted(list(enumerate(similarity[movie_index])), key=lambda x: x[1], reverse=True)
print('Movies suggested for you:\n ')

# Print out the titles of the top 8 similar movies (Bhumik)
for i in ls[1:9]:
    print(movies.loc[i[0]].orig_title)  # Display movie titles (Bhumik)

# (Bhumik Tandon's Code)


  (np.int32(0), np.int32(6393))	0.2860051458610216
  (np.int32(0), np.int32(13353))	0.12309189900771787
  (np.int32(0), np.int32(8181))	0.038558564778140875
  (np.int32(0), np.int32(676))	0.044504241318610756
  (np.int32(0), np.int32(884))	0.10798592953123029
  (np.int32(0), np.int32(8012))	0.16140891968695414
  (np.int32(0), np.int32(27479))	0.11133043368284196
  (np.int32(0), np.int32(3615))	0.2747920358063767
  (np.int32(0), np.int32(30331))	0.061188209723430406
  (np.int32(0), np.int32(776))	0.34463022888069106
  (np.int32(0), np.int32(12230))	0.11338067316090839
  (np.int32(0), np.int32(2817))	0.07967373169222439
  (np.int32(0), np.int32(27604))	0.16796633250627155
  (np.int32(0), np.int32(13549))	0.09158385824922904
  (np.int32(0), np.int32(3563))	0.09059042966271016
  (np.int32(0), np.int32(12689))	0.11173166525774432
  (np.int32(0), np.int32(4404))	0.1020750778749124
  (np.int32(0), np.int32(1402))	0.05150547387426312
  (np.int32(0), np.int32(9813))	0.04959252135560496
  (np.in

Enter your favourite movie:  venom


Closest match found: Venom
Movies suggested for you:
 
Venom
Venom: Let There Be Carnage
Tremors: A Cold Day in Hell
Venom
Venom
Deadstream
DC League of Super-Pets
Alien Warfare
