In [1]:
import pandas as pd

# https://files.grouplens.org/datasets/movielens/ml-25m.zip
movies = pd.read_csv("telugu.csv")

In [2]:
movies.head()

Unnamed: 0,Title,Rating,Year,Genre,Duration,Story,Certificate,Votes
0,Sye Raa Narasimha Reddy,7.6,2019,"Action, Adventure, Drama",167 min,A historical action epic inspired by the life ...,Not Rated,5222
1,Evaru,8.3,2019,"Crime, Drama, Thriller",118 min,Sub-inspector Vikram Vasudev is entrusted with...,Not Rated,2083
2,Agent Sai Srinivasa Athreya,8.5,2019,"Action, Comedy, Crime",148 min,An authentic humorous investigative thriller r...,UA,3501
3,Rakshasudu,8.4,2019,"Action, Crime, Thriller",122 min,A sub-inspector sets out in pursuit of a myste...,UA,692
4,Khaidi,8.6,2019,"Action, Thriller",145 min,"A drug bust, an injured cop and a convicted cr...",UA,5257


In [3]:
import re

def clean_title(title):
    title = re.sub("[^a-zA-Z0-9 ]", "", title)
    return title

In [4]:
movies["clean_title"] = movies["Title"].apply(clean_title)

In [5]:
movies

Unnamed: 0,Title,Rating,Year,Genre,Duration,Story,Certificate,Votes,clean_title
0,Sye Raa Narasimha Reddy,7.6,2019,"Action, Adventure, Drama",167 min,A historical action epic inspired by the life ...,Not Rated,5222,Sye Raa Narasimha Reddy
1,Evaru,8.3,2019,"Crime, Drama, Thriller",118 min,Sub-inspector Vikram Vasudev is entrusted with...,Not Rated,2083,Evaru
2,Agent Sai Srinivasa Athreya,8.5,2019,"Action, Comedy, Crime",148 min,An authentic humorous investigative thriller r...,UA,3501,Agent Sai Srinivasa Athreya
3,Rakshasudu,8.4,2019,"Action, Crime, Thriller",122 min,A sub-inspector sets out in pursuit of a myste...,UA,692,Rakshasudu
4,Khaidi,8.6,2019,"Action, Thriller",145 min,"A drug bust, an injured cop and a convicted cr...",UA,5257,Khaidi
5,Chitralahari,7.1,2019,Drama,131 min,Down on luck and depressed by constant failure...,UA,1230,Chitralahari
6,Jersey,8.6,2019,"Drama, Sport",157 min,A failed cricketer decides to revive his crick...,UA,4428,Jersey
7,Maharshi,7.3,2019,"Action, Drama",176 min,"Rishi, a millionaire businessman, returns to h...",Not Rated,4571,Maharshi
8,Mallesham,8.5,2019,Biography,131 min,The life of Padma Shri winner Chintakindi Mall...,U,512,Mallesham
9,George Reddy,7.3,2019,Action,153 min,A biopic based on the life of a student leader...,UA,367,George Reddy


In [6]:
from sklearn.feature_extraction.text import TfidfVectorizer
vectorizer = TfidfVectorizer(ngram_range=(1,2))

tfidf = vectorizer.fit_transform(movies["clean_title"])

In [7]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

def search(title):
    title = clean_title(title)
    query_vec = vectorizer.transform([title])
    similarity = cosine_similarity(query_vec, tfidf).flatten()
    indices = np.argpartition(similarity, -5)[-5:]
    results = movies.iloc[indices].iloc[::-1]
    
    return results

In [8]:
import ipywidgets as widgets
from IPython.display import display

movie_input = widgets.Text(
    value='Toy Story',
    description='Movie Title:',
    disabled=False
)
movie_list = widgets.Output()

def on_type(data):
    with movie_list:
        movie_list.clear_output()
        title = data["new"]
        if len(title) > 5:
            display(search(title))

movie_input.observe(on_type, names='value')


display(movie_input, movie_list)

Text(value='Toy Story', description='Movie Title:')

Output()