In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import NearestNeighbors

In [72]:
genres = ['action', 'adventure', 'animation', 'biography', 'crime', 'family', 'fantasy', 'history', 'horror', 'mystery', 'romance', 'scifi', 'sports', 'telugu', 'thriller', 'war']  # Add more genres as needed
genre_data = {}
for genre in genres:
    genre_data[genre] = pd.read_csv(f'{genre}.csv')
    if genre != 'telugu':
        if 'description' not in genre_data[genre].columns:
            raise ValueError(f"Description column not found in {genre} dataset.")
    if genre == 'telugu':
        genre_data[genre]['features'] = genre_data[genre]['Overview'].fillna('').astype(str) + ' ' + genre_data[genre]['Rating'].fillna('').astype(str)
    else:
        genre_data[genre]['features'] = genre_data[genre]['description'].fillna('').astype(str) + ' ' + genre_data[genre]['rating'].fillna('').astype(str) + ' ' + genre_data[genre]['director'].fillna('').astype(str)+ ' ' + genre_data[genre]['votes'].fillna('').astype(str)

In [73]:
tfidf_models = {}
for genre in genres:
    tfidf = TfidfVectorizer(stop_words='english')
    tfidf_matrix = tfidf.fit_transform(genre_data[genre]['features'])
    tfidf_models[genre] = (tfidf, tfidf_matrix)

In [74]:
nn_models = {}
for genre in genres:
    nn_model = NearestNeighbors(n_neighbors=6, algorithm='auto')
    nn_model.fit(tfidf_models[genre][1])
    nn_models[genre] = nn_model

In [75]:
def get_recommendations_with_overviews(movie_name, genre, tfidf_models=tfidf_models, nn_models=nn_models, genre_data=genre_data):
    name = None
    summary = None
    if genre == 'telugu':
        name = 'Movie'
        summary = 'Overview'
    else:
        name = 'movie_name'
        summary = 'description'
    tfidf, tfidf_matrix = tfidf_models[genre]
    nn_model = nn_models[genre]
    movie_features = genre_data[genre][genre_data[genre][name] == movie_name]['features'].iloc[0]
    movie_tfidf = tfidf.transform([movie_features])
    distances, indices = nn_model.kneighbors(movie_tfidf)
    movie_indices = indices.flatten()[1:]
    recommendations_with_overviews = []
    for idx in movie_indices:
        movie_name = genre_data[genre][name].iloc[idx]
        movie_overview = genre_data[genre][summary].iloc[idx]
        recommendations_with_overviews.append({'name': movie_name, 'overview': movie_overview})
    return recommendations_with_overviews

In [85]:
genre = 'family'
movie_name = 'Inside Out'
recommendations_with_overviews = get_recommendations_with_overviews(movie_name, genre)
for movie in recommendations_with_overviews:
    print(movie['name'])
    if pd.notnull(movie['overview']) and movie['overview'] != 'Add a Plot' and movie['overview'] != 'Plot is unknown.' :
        print(movie['overview'])
    print()



Inside Out 2
Follow Riley, in her teenage years encountering new emotions.

El ruiseñor chino

Rapunsell

Don't Give Up the Ghost
A child's world transforms everything into magic or into fear. Summer vacation. 3 French brothers, Matt (17), Eliot (14) and Lucas (7), follow their American mother to San Francisco where ...                See full summary »

Up
78-year-old Carl Fredricksen travels to Paradise Falls in his house equipped with balloons, inadvertently taking a young stowaway.

