In [1]:
import pandas as pd

In [2]:
import pandas as pd
import numpy as np

url = "https://raw.githubusercontent.com/rashida048/Some-NLP-Projects/master/movie_dataset.csv"
df = pd.read_csv(url)

# Fill nulls
df[['genres', 'keywords', 'cast', 'director']] = df[['genres', 'keywords', 'cast', 'director']].fillna('')
df[['budget', 'popularity', 'runtime', 'revenue', 'vote_count']] = df[['budget', 'popularity', 'runtime', 'revenue', 'vote_count']].fillna(0)

# Combine textual features
def combine_features(row):
    return f"{row['genres']} {row['keywords']} {row['cast']} {row['director']}"

df['combined_features'] = df.apply(combine_features, axis=1)


In [3]:
from sklearn.feature_extraction.text import TfidfVectorizer
from scipy.sparse import hstack

# TF-IDF Vectorizer
vectorizer = TfidfVectorizer(stop_words='english', max_features=5000)
X_text = vectorizer.fit_transform(df['combined_features'])

# Numerical features
X_numeric = df[['budget', 'popularity', 'runtime', 'revenue', 'vote_count']].astype(float)
X_full = hstack([X_text, X_numeric.values])

# Target
y = df['vote_average'].astype(float)


In [4]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X_full, y, test_size=0.2, random_state=42)

model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)


In [11]:
from sklearn.metrics.pairwise import cosine_similarity

def recommend_by_ai_model(custom_input, top_n=10):
    # Step 1: Combine custom text features
    custom_text = f"{custom_input['genres']} {custom_input['keywords']} {custom_input['cast']} {custom_input['director']}"
    custom_text_vector = vectorizer.transform([custom_text])


    # Step 2: Add zero numerical placeholders (not needed in cosine similarity, but optional)
    zero_numerical = np.zeros((1, 5))  # budget, popularity, runtime, revenue, vote_count
    custom_full_vector = hstack([custom_text_vector, zero_numerical])


    # Step 3: Get cosine similarity to all movies
    similarity = cosine_similarity(custom_text_vector, X_text).flatten()
    print(similarity)

    # Step 4: Predict scores of all movies using trained model
    predicted_scores = model.predict(X_full)


    # Step 5: Combine scores and similarity to rank
    df['similarity'] = similarity
    df['predicted_score'] = predicted_scores

    # Weighted scoring: prioritize both predicted rating and similarity
    df['final_score'] = (df['similarity'] * 0.6) + (df['predicted_score'] / 10 * 0.4)  # normalize predicted_score
    # Sort and recommend
    top_movies = df.sort_values(by='final_score', ascending=False).head(top_n)

    print(f"\nTop {top_n} Recommended Movies for Your Input:\n")
    for idx, row in top_movies.iterrows():
        print(f"{row['title']} - Predicted Rating: {round(row['predicted_score'], 2)} - Similarity: {round(row['similarity'], 2)}")


In [18]:
df

Unnamed: 0,index,budget,genres,homepage,id,keywords,original_language,original_title,overview,popularity,...,title,vote_average,vote_count,cast,crew,director,combined_features,similarity,predicted_score,final_score
0,0,237000000,Action Adventure Fantasy Science Fiction,http://www.avatarmovie.com/,19995,culture clash future space war space colony so...,en,Avatar,"In the 22nd century, a paraplegic Marine is di...",150.437577,...,Avatar,7.2,11800,Sam Worthington Zoe Saldana Sigourney Weaver S...,"[{'name': 'Stephen E. Rivkin', 'gender': 0, 'd...",James Cameron,Action Adventure Fantasy Science Fiction cultu...,0.0,7.239,0.28956
1,1,300000000,Adventure Fantasy Action,http://disney.go.com/disneypictures/pirates/,285,ocean drug abuse exotic island east india trad...,en,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...",139.082615,...,Pirates of the Caribbean: At World's End,6.9,4500,Johnny Depp Orlando Bloom Keira Knightley Stel...,"[{'name': 'Dariusz Wolski', 'gender': 2, 'depa...",Gore Verbinski,Adventure Fantasy Action ocean drug abuse exot...,0.0,6.946,0.27784
2,2,245000000,Action Adventure Crime,http://www.sonypictures.com/movies/spectre/,206647,spy based on novel secret agent sequel mi6,en,Spectre,A cryptic message from Bond’s past sends him o...,107.376788,...,Spectre,6.3,4466,Daniel Craig Christoph Waltz L\u00e9a Seydoux ...,"[{'name': 'Thomas Newman', 'gender': 2, 'depar...",Sam Mendes,Action Adventure Crime spy based on novel secr...,0.0,6.503,0.26012
3,3,250000000,Action Crime Drama Thriller,http://www.thedarkknightrises.com/,49026,dc comics crime fighter terrorist secret ident...,en,The Dark Knight Rises,Following the death of District Attorney Harve...,112.312950,...,The Dark Knight Rises,7.6,9106,Christian Bale Michael Caine Gary Oldman Anne ...,"[{'name': 'Hans Zimmer', 'gender': 2, 'departm...",Christopher Nolan,Action Crime Drama Thriller dc comics crime fi...,0.0,7.463,0.29852
4,4,260000000,Action Adventure Science Fiction,http://movies.disney.com/john-carter,49529,based on novel mars medallion space travel pri...,en,John Carter,"John Carter is a war-weary, former military ca...",43.926995,...,John Carter,6.1,2124,Taylor Kitsch Lynn Collins Samantha Morton Wil...,"[{'name': 'Andrew Stanton', 'gender': 2, 'depa...",Andrew Stanton,Action Adventure Science Fiction based on nove...,0.0,6.038,0.24152
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4798,4798,220000,Action Crime Thriller,,9367,united states\u2013mexico barrier legs arms pa...,es,El Mariachi,El Mariachi just wants to play his guitar and ...,14.269792,...,El Mariachi,6.6,238,Carlos Gallardo Jaime de Hoyos Peter Marquardt...,"[{'name': 'Robert Rodriguez', 'gender': 0, 'de...",Robert Rodriguez,Action Crime Thriller united states\u2013mexic...,0.0,5.853,0.23412
4799,4799,9000,Comedy Romance,,72766,,en,Newlyweds,A newlywed couple's honeymoon is upended by th...,0.642552,...,Newlyweds,5.9,5,Edward Burns Kerry Bish\u00e9 Marsha Dietlein ...,"[{'name': 'Edward Burns', 'gender': 2, 'depart...",Edward Burns,Comedy Romance Edward Burns Kerry Bish\u00e9 ...,0.0,5.006,0.20024
4800,4800,0,Comedy Drama Romance TV Movie,http://www.hallmarkchannel.com/signedsealeddel...,231617,date love at first sight narration investigati...,en,"Signed, Sealed, Delivered","""Signed, Sealed, Delivered"" introduces a dedic...",1.444476,...,"Signed, Sealed, Delivered",7.0,6,Eric Mabius Kristin Booth Crystal Lowe Geoff G...,"[{'name': 'Carla Hetland', 'gender': 0, 'depar...",Scott Smith,Comedy Drama Romance TV Movie date love at fir...,0.0,6.689,0.26756
4801,4801,0,,http://shanghaicalling.com/,126186,,en,Shanghai Calling,When ambitious New York attorney Sam is sent t...,0.857008,...,Shanghai Calling,5.7,7,Daniel Henney Eliza Coupe Bill Paxton Alan Ruc...,"[{'name': 'Daniel Hsia', 'gender': 2, 'departm...",Daniel Hsia,Daniel Henney Eliza Coupe Bill Paxton Alan R...,0.0,5.365,0.21460


In [20]:

custom_input = {
    "genres": "Action Adventure Sci-Fi",
    "keywords": "space future soldier",
    "cast": "Chris Pratt Zoe Saldana",
    "director": "James Gunn",
    "budget": 150000000,
    "popularity": 80,
    "runtime": 121,
    "revenue": 700000000,
    "vote_count": 8000
}


recommend_by_ai_model(custom_input)


[0.3853106  0.02412369 0.03086257 ... 0.         0.         0.        ]
[7.239 6.946 6.503 ... 6.689 5.365 6.434]

Top 10 Recommended Movies for Your Input:

Guardians of the Galaxy - Predicted Rating: 7.59 - Similarity: 0.54
Avatar - Predicted Rating: 7.24 - Similarity: 0.39
Star Trek Into Darkness - Predicted Rating: 6.96 - Similarity: 0.3
Center Stage - Predicted Rating: 6.58 - Similarity: 0.3
Star Trek Beyond - Predicted Rating: 6.29 - Similarity: 0.3
The Lego Movie - Predicted Rating: 7.32 - Similarity: 0.17
Alien - Predicted Rating: 7.82 - Similarity: 0.12
Gravity - Predicted Rating: 7.21 - Similarity: 0.16
The Book of Life - Predicted Rating: 6.83 - Similarity: 0.18
Blood Ties - Predicted Rating: 6.62 - Similarity: 0.19
