In [1]:
#Welcome to movie Recomendation system

In [20]:
import pandas as pd
import numpy as np
import random
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Define a dictionary of 20 movies and their corresponding genres
movies = {
    "Movie1": "Action", "Movie2": "Action", "Movie3": "Action", "Movie4": "Action", "Movie5": "Action",
    "Movie6": "Adventure", "Movie7": "Adventure", "Movie8": "Adventure", "Movie9": "Adventure", "Movie10": "Adventure",
    "Movie11": "Drama", "Movie12": "Drama", "Movie13": "Drama", "Movie14": "Drama", "Movie15": "Drama",
    "Movie16": "Sci-Fi", "Movie17": "Sci-Fi", "Movie18": "Sci-Fi", "Movie19": "Sci-Fi", "Movie20": "Sci-Fi"
}

# Define the total number of users
num_users = 50_000

# Generate a dataset where each user rates every movie
ratings_data = []
for user_id in range(1, num_users + 1):  # Iterate through all users
    for movie, genre in movies.items():  # Iterate through all movies
        rating = random.randint(1, 5)  # Assign a random rating between 1 and 5
        ratings_data.append([user_id, movie, genre, rating])  # Store user, movie, genre, and rating

# Create a DataFrame to store the generated data
df = pd.DataFrame(ratings_data, columns=["User_ID", "Movie_Name", "Genre", "Rating"])

# Compute the average rating for each movie
average_ratings = df.groupby("Movie_Name")["Rating"].mean().reset_index()

# Prepare data for Linear Regression
label_encoder = LabelEncoder()
df["Movie_ID"] = label_encoder.fit_transform(df["Movie_Name"])  # Convert movie names to numerical values
X = df[["User_ID", "Movie_ID"]]
y = df["Rating"]

# Split the dataset for training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the Linear Regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Recommend one movie from each genre based on average ratings
print("Recommended Movies:")
for genre in set(movies.values()):
    genre_movies = df[df["Genre"] == genre]
    
    # Merge with average ratings
    genre_movies = genre_movies.merge(average_ratings, on="Movie_Name", suffixes=("", "_Avg"))
    
    # Drop duplicate movie entries
    genre_movies = genre_movies.drop_duplicates(subset=["Movie_Name"])
    
    # Sort movies by average rating
    genre_movies = genre_movies.sort_values(by="Rating_Avg", ascending=False)
    
    if not genre_movies.empty:
        top_movie = genre_movies.iloc[0]
        print(f"{genre}: {top_movie['Movie_Name']} (Average Rating: {top_movie['Rating_Avg']:.2f})")

# Save the generated dataset to a CSV file for future analysis
df.to_csv("movie_ratings.csv", index=False)

# Display the first few rows of the dataset to verify data structure
df.head()


Recommended Movies:
Action: Movie3 (Average Rating: 3.00)
Drama: Movie12 (Average Rating: 3.00)
Sci-Fi: Movie19 (Average Rating: 3.01)
Adventure: Movie9 (Average Rating: 3.01)


Unnamed: 0,User_ID,Movie_Name,Genre,Rating,Movie_ID
0,1,Movie1,Action,3,0
1,1,Movie2,Action,1,11
2,1,Movie3,Action,2,13
3,1,Movie4,Action,3,14
4,1,Movie5,Action,5,15
