### Imports ###

In [None]:
!pip install pandas
!pip install scikit-learn
!pip install shap

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_absolute_error
from sklearn.ensemble import RandomForestRegressor
import shap
import numpy as np

df = pd.read_csv("Social_Media_Advertising.csv")

### Creating Column Day_Duration ###

This new column will be created for turning duration strings into integers

In [None]:
df["Duration_Days"] = df["Duration"].str.extract("(\\d+)").astype(int)
df.columns

In [None]:
df["Duration_Days"].head()

### Creating Column Acquisition_Cost_Num ###

This new column will be created for turning Acquisiton strings into floats

In [None]:
df["Acquisition_Cost_Num"] = df["Acquisition_Cost"].replace("[$,]", "", regex=True).astype(float)
df.columns

In [None]:
df["Acquisition_Cost_Num"].head()

### Creating Column Season ###

This new column will be created for turning Dates into Seasons

In [None]:
df["Date"] = pd.to_datetime(df["Date"])

df["Month"] = df["Date"].dt.month

df[["Date","Month"]].head()

In [None]:
def get_season(month):
    if month in [12, 1, 2]:
        return "Winter"
    elif month in [3, 4, 5]:
        return "Spring"
    elif month in [6, 7, 8]:
        return "Summer"
    else:
        return "Fall"

df["Season"] = df["Month"].apply(get_season)

In [None]:
df.columns

In [None]:
df[["Date","Month", "Season"]].head(4)

### Feature Selection ###

In [None]:
features = [
    "Duration_Days",
    "Channel_Used",
    "Acquisition_Cost_Num",
    "Target_Audience",
    "Location",
    "Language",
    "Campaign_Goal",
    "Customer_Segment",
    "Season"
]

target = "ROI"

model_df = df[features + [target]]

### One-Hot Encoding ###

In [None]:
model_df_encoded = pd.get_dummies(
    model_df,
    columns=[
        "Channel_Used",
        "Target_Audience",
        "Location",
        "Language",
        "Campaign_Goal",
        "Customer_Segment",
        "Season"
    ],
    drop_first=True
)

### LR Model ###

In [None]:
X = model_df_encoded.drop("ROI", axis=1)
y = model_df_encoded["ROI"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [None]:
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)

In [None]:
y_pred = lr_model.predict(X_test)

r2 = r2_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)

print("RÂ²:", r2)
print("MAE:", mae)

In [None]:
coef_df = pd.DataFrame({
    "Feature": X.columns,
    "Coefficient": lr_model.coef_
}).sort_values(by="Coefficient", ascending=False)

coef_df


### SHAP ###

In [None]:
explainer_lr = shap.Explainer(lr_model, X_train)
shap_values_lr = explainer_lr(X_test)
shap.plots.bar(shap_values_lr, max_display=10)