In [21]:
import joblib
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from thefuzz import process

In [22]:
tfidf = joblib.load('../Models/tfidf_vectorizer.joblib')
tfidf_matrix = joblib.load('../Models/tfidf_matrix.joblib')
df = joblib.load('../Models/movie_dataframe.joblib')

In [23]:
def get_recommendations(user_input_titles, df, tfidf_matrix, top_n = 10):
    if isinstance(user_input_titles, str):
        user_input_titles = [user_input_titles]

    indices = pd.Series(df.index, index=df['original_title']).to_dict()

    matched_indices = []
    all_movie_titles = df['original_title'].tolist()

    for title in user_input_titles:
        best_match = process.extractOne(title, all_movie_titles)

        if best_match[1] > 60:
            matched_movie_name = best_match[0]
            idx = indices[matched_movie_name]
            matched_indices.append(idx)
            print(f'"{title}" Matched to {matched_movie_name}')
        else:
            print(None)
    if not matched_indices:
        print("Movie Not Found")
    
    sim_matrix = cosine_similarity(tfidf_matrix[matched_indices], tfidf_matrix)
    avg_sim = sim_matrix.mean(axis=0)

    for idx in matched_indices:
        avg_sim[idx] = -1

    top_indices = avg_sim.argsort()[::-1][:25]

    qualified = df.iloc[top_indices].copy()
    qualified = qualified.sort_values('score', ascending=False)

    return qualified[['original_title', 'vote_average', 'vote_count', 'score']].head(top_n)

In [24]:
recommended = get_recommendations(['toy sto'], df, tfidf_matrix)
print(recommended)

"toy sto" Matched to toy story 2
                      original_title  vote_average  vote_count     score
0                          toy story           7.7      5415.0  7.685813
14705                    toy story 3           7.6      4710.0  7.584464
20751           toy story of terror!           7.3       246.0  7.076828
24255                partysaurus rex           7.4        81.0  6.831328
23107              hawaiian vacation           6.9       151.0  6.640457
28901  santa claus is comin' to town           6.9        38.0  6.244127
9965          the 40 year old virgin           6.2      2020.0  6.188650
1782                  small soldiers           6.2       522.0  6.158182
30203           a lego brickumentary           6.4        55.0  6.064455
23304             la freccia azzurra           6.8        18.0  5.967878
