In [1]:
# Movie Rating Prediction Model
# Coded and Crafted by Sujal Ganvir

In [2]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

In [3]:
# Load the movie dataset (replace 'your_movie_dataset.csv' with your actual dataset)
movie_data = pd.read_csv('/content/New Text Document.txt')

In [4]:
# Display the initial rows of the dataset for a quick overview
print("Dataset Preview:")
print(movie_data.head())

Dataset Preview:
                                 Name    Year Duration            Genre  \
0                                         NaN      NaN            Drama   
1  #Gadhvi (He thought he was Gandhi)  (2019)  109 min            Drama   
2                         #Homecoming  (2021)   90 min   Drama, Musical   
3                             #Yaaram  (2019)  110 min  Comedy, Romance   
4                   ...And Once Again  (2010)  105 min            Drama   

   Rating Votes            Director       Actor 1             Actor 2  \
0     NaN   NaN       J.S. Randhawa      Manmauji              Birbal   
1     7.0     8       Gaurav Bakshi  Rasika Dugal      Vivek Ghamande   
2     NaN   NaN  Soumyajit Majumdar  Sayani Gupta   Plabita Borthakur   
3     4.4    35          Ovais Khan       Prateik          Ishita Raj   
4     NaN   NaN        Amol Palekar  Rajat Kapoor  Rituparna Sengupta   

           Actor 3  
0  Rajendra Bhatia  
1    Arvind Jangid  
2       Roy Angana  
3  Siddha

In [5]:
# Data Analysis and Preprocessing
# (Handle missing values, convert categorical variables into numerical representations, etc.)

In [6]:
# Assuming 'Name', 'Director', and 'Actor 1' are relevant features for prediction
features = movie_data[['Name', 'Director', 'Actor 1']]
target = movie_data['Rating']


In [7]:
# Handle missing values in the target variable (using mean imputation)
target = target.fillna(target.mean())

In [8]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)


In [9]:
# Use one-hot encoding to convert categorical features into numerical format
preprocessor = ColumnTransformer(
    transformers=[
        ('name_director_actors', OneHotEncoder(handle_unknown='ignore'), ['Name', 'Director', 'Actor 1'])
    ])


In [10]:

# Build a Linear Regression model
rating_model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', LinearRegression())
])

In [11]:
# Train the model
rating_model.fit(X_train, y_train)

In [12]:
# Make predictions on the test set
try:
    predictions = rating_model.predict(X_test)
except ValueError as e:
    print(f"Error during predictions: {e}")
    print("Debugging information:")
    print("X_test dtypes:", X_test.dtypes)
    print("X_train dtypes:", X_train.dtypes)
    print("X_test columns:", X_test.columns)
    print("X_train columns:", X_train.columns)

In [13]:
# Evaluate the model performance
mse = mean_squared_error(y_test, predictions)
print(f'\nModel Evaluation:')
print(f'Mean Squared Error: {mse:.4f}')




Model Evaluation:
Mean Squared Error: 2.6689


In [14]:
# Display a crafted completion message
print("\nMovie Rating Prediction Model successfully coded and crafted by Sujal Ganvir.")


Movie Rating Prediction Model successfully coded and crafted by Sujal Ganvir.
