# Movie Rating Prediction Model
# Coded and Crafted by Sujal Ganvir

# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline


# Load the movie dataset (replace 'your_movie_dataset.csv' with your actual dataset)
movie_data = pd.read_csv('/content/New Text Document.txt')

# Display the initial rows of the dataset for a quick overview
print("Dataset Preview:")
print(movie_data.head())

# Data Analysis and Preprocessing
# (Handle missing values, convert categorical variables into numerical representations, etc.)

# Assuming 'Name', 'Director', and 'Actor 1' are relevant features for prediction
features = movie_data[['Name', 'Director', 'Actor 1']]
target = movie_data['Rating']

# Handle missing values in the target variable (using mean imputation)
target = target.fillna(target.mean())


# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)


# Use one-hot encoding to convert categorical features into numerical format
preprocessor = ColumnTransformer(
    transformers=[
        ('name_director_actors', OneHotEncoder(handle_unknown='ignore'), ['Name', 'Director', 'Actor 1'])
    ])

# Build a Linear Regression model
rating_model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', LinearRegression())
])


# Train the model
rating_model.fit(X_train, y_train)


# Make predictions on the test set
predictions = rating_model.predict(X_test)



# Evaluate the model performance
mse = mean_squared_error(y_test, predictions)
print(f'\nModel Evaluation:')
print(f'Mean Squared Error: {mse:.4f}')


# Display a crafted completion message
print("\nMovie Rating Prediction Model successfully coded and crafted by Sujal Ganvir.")