In [None]:
import pandas as pd
import numpy as np
import seaborn as sns 
import matplotlib.pyplot as plt

import os
import warnings 
warnings.simplefilter(action='ignore', category=FutureWarning)
with warnings.catch_warnings():
    warnings.simplefilter("ignore", RuntimeWarning)

In [54]:
df = pd.read_csv("movies.csv")
df

FileNotFoundError: [Errno 2] No such file or directory: 'movies.csv'

In [55]:
df.isnull().sum()

Title                  0
Plot                   0
Cast                   0
Genre                  0
Runtime               86
Director               0
Production House       0
Release Dates          0
Image URL              0
Rating              1732
Total Ratings       1732
Movie URL              0
dtype: int64

In [56]:
df["Production House"].fillna("N/A", inplace=True)
df["Director"].fillna("N/A", inplace=True)


In [57]:
df.isnull().sum()

Title                  0
Plot                   0
Cast                   0
Genre                  0
Runtime               86
Director               0
Production House       0
Release Dates          0
Image URL              0
Rating              1732
Total Ratings       1732
Movie URL              0
dtype: int64

In [58]:
df["Image URL"].fillna("https://cdn.simplystamps.com/media/catalog/product/5/8/5802-n-a-stock-stamp-hcb.png", inplace=True)

In [59]:
df.isnull().sum()

Title                  0
Plot                   0
Cast                   0
Genre                  0
Runtime               86
Director               0
Production House       0
Release Dates          0
Image URL              0
Rating              1732
Total Ratings       1732
Movie URL              0
dtype: int64

In [60]:
df1 = df.drop(columns=['Total Ratings', 'Rating'])

In [61]:
print(df1.columns)

Index(['Title', 'Plot', 'Cast', 'Genre', 'Runtime', 'Director',
       'Production House', 'Release Dates', 'Image URL', 'Movie URL'],
      dtype='object')


In [62]:
df1['describe'] = (
    df1['Genre'].fillna('') + ' ' +
    df1['Cast'].fillna('') + ' ' +
    df1['Director'].fillna('') + ' ' +
    df1['Production House'].fillna('') + ' ' +
    df1['Plot'].fillna('')
)

In [63]:
print(df1['describe'].iloc[0])

Drama Gaumaya GurungDhanmaya, Dayahang Rai, Aaryan Sigdel, Khagendra Lamichhane, Malika Mahat, Basundhara Bhusal, Mohan Niraula, Aabha Aryal, Puskar Gurung, Binod Neupane, Anjana Baraili, Anoop Bikram Shahi, Buddhi Tamang, Kamal Mani Nepal Dinesh Raut Clock Work Creation PLOT Mahabhoj is a social drama set against Nepal’s political landscape. The film follows a group of ordinary individuals who rise against those who manipulate their lives for personal gain. It explores themes of rebellion, truth, and the fight for justice, tackling deep-rooted issues of political injustice and social inequality. The story revolves around a bank loan manager who desperately needs money and turns to his father for help—only to discover that his father's hard-earned savings have been stolen by a corrupt cooperative and a group of individuals for their own gain.


In [64]:
def stri(x):
    x = x.str.replace(' ','')
    x= x.str.replace('/', '')
    x = x.str.lower()
    return x

df1['describe'] = stri(df1['describe'])
df1['describe'].head()

0    dramagaumayagurungdhanmaya,dayahangrai,aaryans...
1    comedy,dramaaasifshah,saugatmallakamal,garimas...
2    action,dramarajeshhamalavinashbikramshah,nitis...
3    comedy,dramadiliprayamajhi,surbirpandit,ramcha...
4    dramakekiadhikari,upasanasinghthakuri,prakashg...
Name: describe, dtype: object

In [65]:
df1['Title'] = df1['Title'].str.replace(r'\s*\(\d{4}\)', '', regex=True)

In [66]:
df1.Title

0                           Mahabhoj
1                       Jante Bakhro
2                   Narasimha Avatar
3                     Hello Kashiram
4       Ke Ghar Ke Dera - Ghar No. 2
                    ...             
1727                Parivartan (B/W)
1728                 Hijo Aaja Bholi
1729                       Maitighar
1730                            Aama
1731                   Harishchandra
Name: Title, Length: 1732, dtype: object

In [67]:
from sklearn.feature_extraction.text import CountVectorizer
vectorizer = CountVectorizer(max_features = 5000, stop_words='english')
X = vectorizer.fit_transform(df1['describe']).toarray()
X

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 1, 0, ..., 0, 0, 0]], shape=(1732, 5000))

In [68]:
from sklearn.metrics.pairwise import cosine_similarity
similarity = cosine_similarity(X)

In [69]:
similarity

array([[1.        , 0.        , 0.        , ..., 0.        , 0.05103104,
        0.        ],
       [0.        , 1.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 1.        , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.        , 0.        , 0.        , ..., 1.        , 0.        ,
        0.        ],
       [0.05103104, 0.        , 0.        , ..., 0.        , 1.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        1.        ]], shape=(1732, 1732))

In [70]:
def recommended(movie):
    try:
        movie_index = df1[df1['Title'] == movie].index[0]
    except IndexError:
        print(f"❌ Movie '{movie}' not found in the dataset.")
        return []

    distances = similarity[movie_index]
    movie_list = sorted(list(enumerate(distances)), reverse=True, key=lambda x: x[1])[1:6]

    recommendations = []

    print(f"\n🎬 Top 5 recommendations similar to: **{movie}**\n")
    for i, (idx, _) in enumerate(movie_list, 1):
        data = df1.iloc[idx]

        title = data.get('Title', 'N/A')
        genre = data.get('Genre', 'N/A')
        cast = data.get('Cast', 'N/A')
        director = data.get('Director', 'N/A')
        prod = data.get('Production House', 'N/A')
        dates = data.get('Release Dates', 'N/A')
        plot = data.get('Plot', 'Plot not found')

        print(f"-> \033[1m{title}\033[0m")
        print(f"\t📌 Genre: {genre}")
        print(f"\t👥 Casts: {cast}")
        print(f"\t🎬 Director: {director}")
        print(f"\t🏭 Production House: {prod}")
        print(f"\t📅 Release on: {dates}")
        print(f"\t📝 Movie description: {plot}\n")

        recommendations.append(title)

    return recommendations


In [71]:
recommended('Jante Bakhro')


🎬 Top 5 recommendations similar to: **Jante Bakhro**

-> [1mWelcome to Hemjakot[0m
	📌 Genre: Comedy,Drama
	👥 Casts: Buddhi Tamang, Binod Neupane, Ambika Tamang, Sujaira Karki, Ashmita Panta, Jayananda Lama, Bishal Pahari, Rabi Giri, Sharadha Giri
	🎬 Director: Saroj Poudel
	🏭 Production House: Film City Production House,Close-up Cine Production
	📅 Release on: 26 Jestha, 2080, 9 June, 2023
	📝 Movie description: PLOT “Welcome to Hemjakot” is a movie based on the story of three boys who are living in the common village of Hilly area as friends. The movie is based on the simple social drama of the struggle and tragic intercaste love of middle class youths which is similar to the accident caused by intercaste love in Rukum district a few years ago.

-> [1mTakdhina Dhin[0m
	📌 Genre: Comedy,Drama
	👥 Casts: Royadip Shrestha, Nabina Silwal
	🎬 Director: N/A
	🏭 Production House: N/A
	📅 Release on: 23 Mangshir, 2077, 8 December, 2020
	📝 Movie description: Plot not found

-> [1mHrashwo Deergha

['Welcome to Hemjakot',
 'Takdhina Dhin',
 'Hrashwo Deergha',
 'Aankha Lobhi Man Papi',
 'Matti Mala']