In [9]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, VotingRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

import lime
import lime.lime_tabular

In [10]:
# Load and preprocess the data
df = pd.read_csv('data/Advertising Budget and Sales.csv')
df.drop(columns=['Unnamed: 0'], inplace=True)
df.rename(columns={
    'TV Ad Budget ($)': 'TV_Budget',
    'Radio Ad Budget ($)': 'Radio_Budget',
    'Newspaper Ad Budget ($)': 'Newspaper_Budget',
    'Sales ($)': 'Sales'
}, inplace=True)

In [11]:
# Use basic features for this ensemble
X = df[['TV_Budget', 'Radio_Budget', 'Newspaper_Budget']]
y = df['Sales']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [12]:
# Initialize base models (excluding XGBoost here)
model_lr = LinearRegression()
model_rf = RandomForestRegressor(random_state=42)
model_gb = GradientBoostingRegressor(random_state=42)

# Build the ensemble using VotingRegressor
ensemble = VotingRegressor(estimators=[
    ('lr', model_lr),
    ('rf', model_rf),
    ('gb', model_gb)
])
ensemble.fit(X_train, y_train)

In [13]:
# Make predictions and evaluate
y_pred = ensemble.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)

In [14]:
print("Ensemble Model Performance:")
print(f"MAE: {mae:.2f}")
print(f"RMSE: {rmse:.2f}")
print(f"R²: {r2:.2f}")

Ensemble Model Performance:
MAE: 0.62
RMSE: 0.79
R²: 0.98


In [15]:
explainer = lime.lime_tabular.LimeTabularExplainer(
    X_train.values,
    feature_names=X_train.columns,
    mode='regression',
    discretize_continuous=True
)

# Pick an instance from the test set to explain
instance_idx = 0
instance = X_test.iloc[instance_idx].values

# Generate an explanation for the selected instance using our ensemble model
exp = explainer.explain_instance(
    instance,
    lambda x: ensemble.predict(pd.DataFrame(x, columns=X_train.columns)),
    num_features=10
)

# Display the explanation within the notebook
exp.show_in_notebook(show_table=True)

ImportError: cannot import name 'display' from 'IPython.core.display' (c:\Users\Singapore Athletic\Desktop\Alph\Sem2\BT5102\FinalProject\env\Lib\site-packages\IPython\core\display.py)