In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_absolute_error

In [5]:
df = pd.read_csv('advertising.csv')
df.head()

Unnamed: 0,TV,Radio,Newspaper,Sales
0,230.1,37.8,69.2,22.1
1,44.5,39.3,45.1,10.4
2,17.2,45.9,69.3,12.0
3,151.5,41.3,58.5,16.5
4,180.8,10.8,58.4,17.9


In [None]:
df.shape

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
df.isnull().sum()

In [None]:
df.corr()

In [None]:
X = df[['TV', 'Radio', 'Newspaper']]
y = df['Sales']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train.head()

In [None]:
lr = LinearRegression()
lr.fit(X_train, y_train)
lr_pred = lr.predict(X_test)

print("LR R2:", r2_score(y_test, lr_pred))
print("LR MAE:", mean_absolute_error(y_test, lr_pred))

In [None]:
rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
rf_pred = rf.predict(X_test)

print("RF R2:", r2_score(y_test, rf_pred))
print("RF MAE:", mean_absolute_error(y_test, rf_pred))

In [None]:
plt.figure(figsize=(12, 6))
plt.plot(y_test.values, label='Actual', color='black', lw=2)
plt.plot(lr_pred, label='Linear Regression', color='blue', linestyle='--')
plt.plot(rf_pred, label='Random Forest', color='red', linestyle=':')
plt.title('Sales Prediction Comparison: LR vs RF')
plt.xlabel('Test Sample Index')
plt.ylabel('Sales')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
comp_df = pd.DataFrame({
    'Actual': y_test.values,
    'LR_Predicted': lr_pred,
    'RF_Predicted': rf_pred
})
comp_df.head(15)