<a href="https://colab.research.google.com/github/AkashB-13/NaanMudhalvan/blob/main/Theaterical%20Prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:

import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

# Load dataset
movie_df = pd.read_csv("/content/movie_reviews_10000.csv")

# Define target and feature columns
target_columns = ["IMDb Rating", "Budget (USD M)", "BoxOffice (USD M)", "User Sentiment Score (0–1)"]
feature_columns = ["Genre", "Director", "Main Actors", "Release Year", "RottenTomatoes (%)", "Award Nominations"]

# Prepare inputs and outputs
X = movie_df[feature_columns]
Y = movie_df[target_columns]

# Define categorical features for one-hot encoding
categorical_features = ["Genre", "Director", "Main Actors"]

# Preprocessing and model pipeline
preprocessor = ColumnTransformer(
    transformers=[('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)],
    remainder='passthrough'
)

reg_model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', LinearRegression())
])

# Train/test split and model fitting
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)
reg_model.fit(X_train, Y_train)

# Function to fetch actual and predicted movie details
def get_movie_analysis(movie_title):
    movie = movie_df[movie_df["Movie Title"] == movie_title]
    if movie.empty:
        return f"❌ Movie '{movie_title}' not found in the dataset."

    input_features = movie[feature_columns]
    predictions = reg_model.predict(input_features)[0]

    predicted_data = {
        "Predicted IMDb Rating": round(predictions[0], 2),
        "Predicted Budget (USD M)": round(predictions[1], 2),
        "Predicted BoxOffice (USD M)": round(predictions[2], 2),
        "Predicted User Sentiment Score": round(predictions[3], 2)
    }

    actual_info = {
        "Director": movie["Director"].values[0],
        "Genre": movie["Genre"].values[0],
        "Main Actors": movie["Main Actors"].values[0],
        "Release Year": movie["Release Year"].values[0],
        "Actual IMDb Rating": movie["IMDb Rating"].values[0],
        "Actual Budget (USD M)": movie["Budget (USD M)"].values[0],
        "Actual BoxOffice (USD M)": movie["BoxOffice (USD M)"].values[0],
        "Actual User Sentiment Score": movie["User Sentiment Score (0–1)"].values[0]
    }

    return {
        "🎬 Actual Movie Details": actual_info,
        "📈 Predicted Values by Model": predicted_data
    }

# Repeating prompt loop
while True:
    user_input = input("Enter the movie name (e.g., Movie 100) or type 'exit' to quit: ")

    if user_input.lower() == 'exit':
        print("✅ Exiting movie analysis tool. Goodbye!")
        break

    result = get_movie_analysis(user_input)

    print("\nMovie Analysis:")
    if isinstance(result, dict):
        for section, data in result.items():
            print(f"\n{section}")
            for key, value in data.items():
                print(f"{key}: {value}")
    else:
        print(result)

Enter the movie name (e.g., Movie 100) or type 'exit' to quit: Movie 100

Movie Analysis:

🎬 Actual Movie Details
Director: Greta Gerwig
Genre: Romance
Main Actors: Tom Hanks, Chris Hemsworth
Release Year: 1997
Actual IMDb Rating: 5.7
Actual Budget (USD M): 67.72
Actual BoxOffice (USD M): 1637.41
Actual User Sentiment Score: 0.88

📈 Predicted Values by Model
Predicted IMDb Rating: 5.37
Predicted Budget (USD M): 169.08
Predicted BoxOffice (USD M): 1005.84
Predicted User Sentiment Score: 0.56
Enter the movie name (e.g., Movie 100) or type 'exit' to quit: Movie100

Movie Analysis:
❌ Movie 'Movie100' not found in the dataset.
