# Implementation: Linear Regression

Predicting Sales based on Advertising Spend.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

sns.set_theme()

# 1. Generate Data (Advertising)
np.random.seed(42)
n_samples = 200
tv = np.random.normal(150, 50, n_samples)
radio = np.random.normal(30, 10, n_samples)
newspaper = np.random.normal(40, 20, n_samples)

# Sales = 2.5 + 0.05*TV + 0.18*Radio + 0.002*Newspaper + Noise
sales = 2.5 + 0.05 * tv + 0.18 * radio + 0.002 * newspaper + np.random.normal(0, 2, n_samples)

df = pd.DataFrame({'TV': tv, 'Radio': radio, 'Newspaper': newspaper, 'Sales': sales})

# 2. Visualize
sns.pairplot(df, x_vars=['TV', 'Radio', 'Newspaper'], y_vars='Sales', height=4, aspect=0.8, kind='reg')
plt.show()

## 3. Train Model

In [None]:
X = df[['TV', 'Radio', 'Newspaper']]
y = df['Sales']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LinearRegression()
model.fit(X_train, y_train)

# Coefficients
print("Intercept:", model.intercept_)
print("Coefficients:", dict(zip(X.columns, model.coef_)))
print("Notice Newspaper coeff is effectively zero (no impact).")

# Evaluate
y_pred = model.predict(X_test)
print(f"MSE: {mean_squared_error(y_test, y_pred):.2f}")
print(f"R2 Score: {r2_score(y_test, y_pred):.2f}")