<a href="https://colab.research.google.com/github/Lisha0128/Python_AI_projects_-Basics-/blob/main/netflix.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler

# Step 1: Load the dataset
df = pd.read_csv('Netflix_Dataset_Rating.csv')  # Replace with your actual dataset path

# Step 2: Handle missing values
numeric_columns = df.select_dtypes(include=['float64', 'int64']).columns
df[numeric_columns] = df[numeric_columns].fillna(df[numeric_columns].mean())

categorical_columns = df.select_dtypes(include=['object']).columns
for col in categorical_columns:
    df[col].fillna(df[col].mode()[0], inplace=True)

# Check if missing values are handled
print(df.isnull().sum())

# Step 3: Feature Engineering
# Convert categorical variables into numeric using one-hot encoding
df = pd.get_dummies(df, drop_first=True)

# Step 4: Feature Scaling (if needed)
scaler = StandardScaler()
df[['Rating']] = scaler.fit_transform(df[['Rating']])  # Scaling the 'Rating' column

# Step 5: Define Features and Target Variable
X = df.drop(columns=['Rating'])  # All columns except 'Rating'
y = df['Rating']  # Target variable is 'Rating'

# Step 6: Split the Data into Training and Testing Sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 7: Build the Random Forest Model
model = RandomForestRegressor(n_estimators=100, max_depth=10, random_state=42)  # Random Forest Model

# Train the model on the training data
model.fit(X_train, y_train)

# Step 8: Make Predictions
y_pred = model.predict(X_test)

# Step 9: Model Evaluation
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse}")
print(f"R-Squared: {r2}")

# # Step 10: Cross-Validation (Optional for better results)
# cv_scores = cross_val_score(model, X, y, cv=5, scoring='neg_mean_squared_error')
# print(f"Cross-Validation MSE: {-cv_scores.mean()}")

# # Step 11: Feature Importance (Optional)
# importances = model.feature_importances_
# print("Feature Importances:", importances)

# Example: Predict Ratings for New Data (if applicable)
# X_new = pd.DataFrame({'Feature1': [value1], 'Feature2': [value2], ...})
# y_new_pred = model.predict(X_new)
# print(y_new_pred)


User_ID     0
Rating      0
Movie_ID    0
dtype: int64
Mean Squared Error: 0.9208048916310466
R-Squared: 0.07140918301688448
