In [18]:
import pandas as pd
import kagglehub
import os

 
path = kagglehub.dataset_download("ashydv/advertising-dataset")
csv_files = [f for f in os.listdir(path) if f.endswith('.csv')]
csv_path = os.path.join(path, csv_files[0])
df = pd.read_csv(csv_path)

print("OFFICIAL KAGGLE ADVERTISING DATASET LOADED!")
print(f"Shape: {df.shape}")
print(df.head(3))
print("\nSales stats:", df['Sales'].describe())


OFFICIAL KAGGLE ADVERTISING DATASET LOADED!
Shape: (200, 4)
      TV  Radio  Newspaper  Sales
0  230.1   37.8       69.2   22.1
1   44.5   39.3       45.1   10.4
2   17.2   45.9       69.3   12.0

Sales stats: count    200.000000
mean      15.130500
std        5.283892
min        1.600000
25%       11.000000
50%       16.000000
75%       19.050000
max       27.000000
Name: Sales, dtype: float64


In [21]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_squared_error
import math

X = df[['TV', 'Radio', 'Newspaper']]
y = df['Sales']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

 
lr = LinearRegression()
lr.fit(X_train, y_train)
lr_pred = lr.predict(X_test)
print("Linear Regression:")
print(f"  R²: {r2_score(y_test, lr_pred):.3f}")
print(f"  RMSE: {math.sqrt(mean_squared_error(y_test, lr_pred)):.2f}")

 
rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
rf_pred = rf.predict(X_test)
print("\nRandom Forest:")
print(f"  R²: {r2_score(y_test, rf_pred):.3f}")
print(f"  RMSE: {math.sqrt(mean_squared_error(y_test, rf_pred)):.2f}")


Linear Regression:
  R²: 0.906
  RMSE: 1.71

Random Forest:
  R²: 0.953
  RMSE: 1.20


In [20]:
importances = pd.DataFrame({
    'Channel': ['TV', 'Radio', 'Newspaper'],
    'Importance': rf.feature_importances_
}).sort_values('Importance', ascending=False)

print("Best Ad Channels:")
print(importances.round(3))

new_campaign = [[200, 50, 10]]
pred_sales = rf.predict(new_campaign)[0]
print(f"\nPrediction: TV $200K + Radio $50K → ${pred_sales:.1f}K sales")


Best Ad Channels:
     Channel  Importance
0         TV       0.845
1      Radio       0.137
2  Newspaper       0.018

Prediction: TV $200K + Radio $50K → $22.8K sales
