In [24]:
import pandas as pd

# Load the dataset
df = pd.read_csv(r"E:\TakeYourFilm\Cleaned_dataset\merged_movies.csv")

# Define features and target variable
features = ['genre', 'rating', 'votes']
target = ['normalized_rating']

# Separate features (X) and target variable (y)
X = df[features]
y = df[target]

# Display initial data
print("Features (X):")
print(X.head())

print("\nTarget Variable (Y):")
print(y.head())


Features (X):
    genre  rating     votes
0  Action     6.9  204835.0
1  Action     7.8  295119.0
2  Action     6.5   26220.0
3  Action     8.0  327858.0
4  Action     0.0       0.0

Target Variable (Y):
   normalized_rating
0               0.69
1               0.78
2               0.65
3               0.80
4               0.00


In [25]:
from sklearn.preprocessing import OneHotEncoder, StandardScaler

# One-Hot Encoding for Genre
encoder = OneHotEncoder(sparse_output=False)
genre_encoded = encoder.fit_transform(df[['genre']])

# Normalize rating and votes
scaler = StandardScaler()
scaled_features = scaler.fit_transform(df[['rating', 'votes']])

# Combine encoded genre with scaled numerical features
import numpy as np
X_processed = np.hstack((genre_encoded, scaled_features))

print("Processed Features Shape:", X_processed.shape)


Processed Features Shape: (238230, 10)


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_processed, y, test_size=0.2, random_state=42)

# Train model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train.values.ravel())
# how ravel() works =  [[[10]]] -> [10]
# Predict and evaluate
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse:.2f}")


Mean Squared Error: 0.00


In [None]:
import numpy as np

def recommend_movies():
    
    genre_name = input("Enter a movie genre: ").strip().lower()

    genre_movies = df[df['genre'].str.contains(genre_name, case=False, na=False)]
    
    if genre_movies.empty:
        print(f"No movies found for genre: {genre_name}")
        return None
    
    X_genre = genre_movies[features]
    
    genre_encoded = encoder.transform(X_genre[['genre']])
    scaled_features = scaler.transform(X_genre[['rating', 'votes']])
    X_genre_processed = np.hstack((genre_encoded, scaled_features))

    predicted_scores = model.predict(X_genre_processed)
    
    genre_movies = genre_movies.copy()
    genre_movies['predicted_score'] = predicted_scores
    top_movies = genre_movies.sort_values(by='predicted_score', ascending=False).head(10)

    return top_movies[['movie_name', 'rating', 'votes', 'predicted_score']]

recommended_movies = recommend_movies()

# Display results if movies are found
if recommended_movies is not None:
    print(recommended_movies)


                                             movie_name  rating  votes  \
108707           Nerazumevalica (Understandinglessness)     9.9   52.0   
108444                                      Mrugtrushna     9.8  419.0   
107537                      Unbounded - Animated Series     9.7    7.0   
111398                                     Pullu Rising     9.6   29.0   
108576                                 Adam & the Water     9.5   11.0   
109378                                              7 7     9.4   11.0   
108412  Kamen Rider Geats × Revice: Movie Battle Royale     9.4   11.0   
106764                                         Spamalot     9.4   12.0   
108300                                    La otra magia     9.4    5.0   
112230                              The World of Kapata     9.4   77.0   

        predicted_score  
108707             0.99  
108444             0.98  
107537             0.97  
111398             0.96  
108576             0.95  
109378             0.94  
108

In [None]:
import joblib

joblib.dump(model, "film_model.pkl")
print("Model saved successfully!")


Model saved successfully!


In [None]:
import joblib

joblib.dump(encoder, "encoder.pkl")
print("Encoder saved successfully!")


Encoder saved successfully!


In [None]:
import joblib

joblib.dump(scaler, "scaler.pkl")
print("Scaler saved successfully!")


Scaler saved successfully!
