In [183]:
print('Hello')

Hello


In [184]:
!pip install fastapi uvicorn pandas scikit-learn




[notice] A new release of pip is available: 25.0 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [185]:
from fastapi import FastAPI
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [186]:
app = FastAPI()

In [187]:
df = pd.read_csv("TeluguMovies_dataset.csv")

In [188]:
df.head(6)

Unnamed: 0.1,Unnamed: 0,Movie,Year,Certificate,Genre,Overview,Runtime,Rating,No.of.Ratings
0,0,Bahubali: The Beginning,2015.0,UA,"Action, Drama","In ancient India, an adventurous and darin...",159,8.1,99114
1,1,Baahubali 2: The Conclusion,2017.0,UA,"Action, Drama","When Shiva, the son of Bahubali, learns ab...",167,8.2,71458
2,2,1 - Nenokkadine,2014.0,UA,"Action, Thriller",A rock star must overcome his psychologica...,170,8.1,42372
3,3,Dhoom:3,2013.0,UA,"Action, Thriller","When Sahir, a circus entertainer trained i...",172,5.4,42112
4,4,Ra.One,2011.0,U,"Action, Adventure, Sci-Fi",When the titular antagonist of an action g...,156,4.6,37211
5,5,Dhoom:2,2006.0,UA,"Action, Thriller","Mr A, a fearless thief, steals valuable ar...",152,6.5,22983


In [189]:
tfidf = TfidfVectorizer(stop_words="english")
tfidf_matrix = tfidf.fit_transform(df["Overview"].fillna(""))
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

In [190]:
def recommend_movies(movie_title: str, num_recommendations: int = 5):
    if movie_title not in df["Movie"].values:
        return {"error": f"Movie '{movie_title}' not found."}
    
    idx = df[df["Movie"] == movie_title].index[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1:num_recommendations+1]
    movie_indices = [i[0] for i in sim_scores]
    
    return {"recommendations": df["Movie"].iloc[movie_indices].tolist()}


In [191]:
app = FastAPI()
@app.get("/recommend")
def get_recommendations(movie: str, num: int = 5):
    return recommend_movies(movie, num)


In [192]:
@app.get("/")
def home():
    return {"message": "Welcome to the Telugu Movie Recommendation API!"}

In [193]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1400 entries, 0 to 1399
Data columns (total 9 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Unnamed: 0     1400 non-null   int64  
 1   Movie          1400 non-null   object 
 2   Year           1352 non-null   float64
 3   Certificate    951 non-null    object 
 4   Genre          1389 non-null   object 
 5   Overview       1221 non-null   object 
 6   Runtime        1400 non-null   int64  
 7   Rating         1400 non-null   float64
 8   No.of.Ratings  1400 non-null   int64  
dtypes: float64(2), int64(3), object(4)
memory usage: 98.6+ KB


In [194]:
df.isnull().sum()

Unnamed: 0         0
Movie              0
Year              48
Certificate      449
Genre             11
Overview         179
Runtime            0
Rating             0
No.of.Ratings      0
dtype: int64

In [195]:
df.dropna(inplace=True)

In [196]:
df.duplicated().sum()

np.int64(0)

In [197]:
movies=df

In [198]:
movies.iloc[0].Genre

'Action, Drama            '

In [199]:
movies.Overview[0]

'    In ancient India, an adventurous and daring man becomes involved in a decades old feud between two warring people.'

In [200]:
movies['tags'] = movies['Genre'] + " " + movies['Overview'] + " " + movies['Certificate']

In [201]:
movies['Genres']=movies['Genre'].apply(lambda x:[i.replace("", " ")for i in x])

In [202]:
movies.head()

Unnamed: 0.1,Unnamed: 0,Movie,Year,Certificate,Genre,Overview,Runtime,Rating,No.of.Ratings,tags,Genres
0,0,Bahubali: The Beginning,2015.0,UA,"Action, Drama","In ancient India, an adventurous and darin...",159,8.1,99114,"Action, Drama In ancient India...","[ A , c , t , i , o , n , , , , D , ..."
1,1,Baahubali 2: The Conclusion,2017.0,UA,"Action, Drama","When Shiva, the son of Bahubali, learns ab...",167,8.2,71458,"Action, Drama When Shiva, the ...","[ A , c , t , i , o , n , , , , D , ..."
2,2,1 - Nenokkadine,2014.0,UA,"Action, Thriller",A rock star must overcome his psychologica...,170,8.1,42372,"Action, Thriller A rock star m...","[ A , c , t , i , o , n , , , , T , ..."
3,3,Dhoom:3,2013.0,UA,"Action, Thriller","When Sahir, a circus entertainer trained i...",172,5.4,42112,"Action, Thriller When Sahir, a...","[ A , c , t , i , o , n , , , , T , ..."
4,4,Ra.One,2011.0,U,"Action, Adventure, Sci-Fi",When the titular antagonist of an action g...,156,4.6,37211,"Action, Adventure, Sci-Fi When...","[ A , c , t , i , o , n , , , , A , ..."


In [203]:
import nltk

In [204]:
from nltk.stem.porter import PorterStemmer
ps=PorterStemmer()

In [205]:
def stem(text):
    y=[]
    for i in text.split():
        y.append(ps.stem(i))
    return " ".join(y)

In [206]:
movies['tags'].apply(stem)

0       action, drama in ancient india, an adventur an...
1       action, drama when shiva, the son of bahubali,...
2       action, thriller a rock star must overcom hi p...
3       action, thriller when sahir, a circu entertain...
4       action, adventure, sci-fi when the titular ant...
                              ...                        
1383    drama an immens success film director is get r...
1386    drama maavichiguru is a 1996 indian telugu fil...
1389    action, crime, famili two local ne'er-do-wel e...
1390    action, crime thi is the stori of a law-abid y...
1399    drama surendra marri savitri, an orphan. in a ...
Name: tags, Length: 840, dtype: object

In [207]:
movies['tags'][300]

'Action, Comedy                 Ravi (Ravi Teja) works as a collection agent of ICICI in Bangalore and leads a happy life along with his friends and his father Mohan Rao (Prakash Raj) who wishes to see Ravi married to a ...                See full summary\xa0» UA'

In [208]:
from sklearn.feature_extraction.text import CountVectorizer
cv=CountVectorizer(max_features=500,stop_words='english')

In [209]:
vectors=cv.fit_transform(movies['tags']).toarray()

In [210]:
vectors[0]

array([0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

In [211]:
cv.get_feature_names_out()


array(['accident', 'accidentally', 'acquire', 'action', 'actor',
       'actually', 'adopted', 'adventure', 'age', 'agent', 'aka', 'anand',
       'andhra', 'apart', 'arjun', 'army', 'aspiring', 'attack',
       'attempt', 'attitude', 'avenge', 'away', 'babu', 'baby', 'bad',
       'balu', 'based', 'battle', 'beautiful', 'believes', 'best',
       'bhanu', 'big', 'billionaire', 'biography', 'birth', 'blind',
       'blood', 'body', 'bond', 'boss', 'boy', 'breaks', 'bring',
       'brings', 'brother', 'brothers', 'business', 'businessman',
       'called', 'care', 'career', 'carefree', 'case', 'caste', 'caught',
       'cbi', 'chakravarthy', 'challenges', 'chandra', 'chandu', 'change',
       'changes', 'changing', 'character', 'chief', 'child', 'childhood',
       'children', 'chiranjeevi', 'circumstances', 'city', 'class',
       'classical', 'close', 'college', 'come', 'comedy', 'comes',
       'commit', 'company', 'complicated', 'consequences', 'cop',
       'corrupt', 'corruption',

In [212]:
ps.stem('dancing')

'danc'

In [213]:
!pip install nltk




[notice] A new release of pip is available: 25.0 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [214]:
from sklearn.metrics.pairwise import cosine_similarity

In [215]:
similarity=cosine_similarity(vectors)

In [216]:
sorted(list(enumerate(similarity[0])),reverse=True,key=lambda x:x[1])[1:6]

[(242, np.float64(0.5773502691896258)),
 (457, np.float64(0.5590169943749475)),
 (111, np.float64(0.5345224838248487)),
 (418, np.float64(0.5345224838248487)),
 (266, np.float64(0.5303300858899106))]

In [None]:
def recommend(movie):
    movie_index=movies[movies['Movie']==movie].index[0]
    distances=similarity[movie_index]
    movies_list=sorted(list(enumerate(distances)),reverse=True,key=lambda x:x[1])[1:6]
    for i in movies_list:
        movie_id=i[0]
        //fetch poster from Api
        print(movies.iloc[i[0]].Movie)
        print(i[0])
    return 

In [217]:
recommend('Sarileru Neekevvaru')

(647, np.float64(0.6299407883487119))
(701, np.float64(0.6172133998483676))
(405, np.float64(0.5669467095138407))
(102, np.float64(0.5345224838248487))
(140, np.float64(0.5345224838248487))


In [172]:
recommend('Sarileru Neekevvaru')

Thikka
647
Fitting Master
701
Settai
405
Yamadonga
102
Rachcha
140


In [173]:
movies.iloc[13].Movie

'Saaho'

In [174]:
import pickle

In [175]:
pickle.dump(movies,open('movies.pkl','wb'))

In [179]:
pickle.dump(similarity,open('similarity.pkl','wb'))

In [176]:
movies['Movie'].values

array(['Bahubali: The Beginning', 'Baahubali 2: The Conclusion',
       '1 - Nenokkadine', 'Dhoom:3', 'Ra.One', 'Dhoom:2', 'Eega',
       'Krrish 3', 'Arjun Reddy', 'Rangasthalam', 'Magadheera', 'War',
       'Bharat Ane Nenu', 'Saaho', 'Theri', 'Dookudu', 'Pokiri', 'Sarkar',
       'Athadu', 'The Ghazi Attack', 'Kabali',
       'MSG: The Messenger of God', 'Nanban', 'Srimanthudu',
       'Veer - Vivegam', 'Billa 2', '7 Aum Arivu', 'Bigil',
       'Business Man', 'Geetha Govindam', 'Mahanati', 'Spyder',
       'Nannaku Prematho', 'Dabangg 3', 'Race Gurram', 'Okkadu',
       'Bommarillu', 'Atharintiki Daaredi', 'Khaleja', 'Yennai Arindhaal',
       'Thalaivaa', 'Bairavaa', 'Goodachari', 'Puli', 'Pulimurugan',
       'Veeram', 'Vedam', 'Yevadu', 'Aravindha Sametha Veera Raghava',
       'Billa', 'Jersey', 'Sye Raa Narasimha Reddy',
       'Ala Vaikunthapurramuloo', 'Janatha Garage', 'Gabbar Singh',
       'Temper', 'Singam 2', 'Dhruva', 'Jalsa', 'Maharshi',
       'Pelli Choopulu', 'Arya

In [178]:
pickle.dump(movies.to_dict(),open('movie_dict.pkl','wb'))

In [None]:
recommend('Baahubali 2: The Conclusion')

Venky Mama
300
Ala Vaikunthapurramuloo
52
Dabangg 3
33
Baanam
584
Maruthu
502


In [None]:
movies.to_dict()

{'Unnamed: 0': {0: 0,
  1: 1,
  2: 2,
  3: 3,
  4: 4,
  5: 5,
  6: 6,
  7: 7,
  8: 8,
  9: 9,
  10: 10,
  11: 11,
  12: 12,
  13: 13,
  14: 14,
  15: 15,
  16: 16,
  17: 17,
  18: 18,
  19: 19,
  20: 20,
  21: 21,
  22: 22,
  23: 23,
  24: 24,
  25: 25,
  27: 27,
  28: 28,
  29: 29,
  30: 30,
  31: 31,
  32: 32,
  33: 33,
  34: 34,
  37: 37,
  38: 38,
  39: 39,
  40: 40,
  41: 41,
  42: 42,
  43: 43,
  45: 45,
  46: 46,
  47: 47,
  48: 48,
  49: 49,
  50: 50,
  51: 51,
  52: 52,
  53: 53,
  54: 54,
  55: 55,
  56: 56,
  57: 57,
  58: 58,
  59: 59,
  61: 61,
  62: 62,
  63: 63,
  64: 64,
  65: 65,
  66: 66,
  67: 67,
  68: 68,
  69: 69,
  70: 70,
  71: 71,
  72: 72,
  73: 73,
  74: 74,
  75: 75,
  76: 76,
  77: 77,
  78: 78,
  79: 79,
  80: 80,
  81: 81,
  82: 82,
  84: 84,
  85: 85,
  87: 87,
  88: 88,
  89: 89,
  90: 90,
  91: 91,
  92: 92,
  93: 93,
  94: 94,
  95: 95,
  96: 96,
  97: 97,
  99: 99,
  100: 100,
  101: 101,
  102: 102,
  103: 103,
  104: 104,
  105: 105,
  106: 106,
  