In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

file_path = '/content/drive/MyDrive/A Round Ent/combined_df.csv'
data = pd.read_csv(file_path)

numerical_data = data.select_dtypes(['number']).dropna()
X = numerical_data.drop(["Avg. Gross USD", 'Ticket Price Avg. USD', 'Avg. Tickets Sold'], axis=1)
y = numerical_data['Avg. Gross USD']

X = pd.get_dummies(X, drop_first=True)

X, y = X.align(y, join='inner', axis=0)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)

print(f"MAE: {mae:.2f}")
print(f"RMSE: {rmse:.2f}")
print(f"R^2 Score: {r2:.4f}")

  data = pd.read_csv(file_path)


MAE: 46284.30
RMSE: 171395.39
R^2 Score: 0.9220


In [19]:
from sklearn.ensemble import StackingRegressor
from sklearn.linear_model import Ridge
import xgboost as xgb

file_path = '/content/drive/MyDrive/A Round Ent/combined_df.csv'
data = pd.read_csv(file_path)

numerical_data = data.select_dtypes(['number']).dropna()

X = numerical_data.drop(["Avg. Gross USD", 'Ticket Price Avg. USD', 'Avg. Tickets Sold'], axis=1)
y = numerical_data['Avg. Gross USD']

X, y = X.align(y, join='inner', axis=0)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

rf = RandomForestRegressor(n_estimators=100, random_state=42)
xgb_model = xgb.XGBRegressor(n_estimators=200, learning_rate=0.05, max_depth=6, random_state=42)

stack_model = StackingRegressor(
    estimators=[
        ('rf', rf),
        ('xgb', xgb_model)
    ],
    final_estimator=Ridge(alpha=1.0),
    passthrough=True
)

stack_model.fit(X_train, y_train)

y_pred = stack_model.predict(X_test)

mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)

print(f"MAE: {mae:.2f}")
print(f"RMSE: {rmse:.2f}")
print(f"R^2 Score: {r2:.4f}")