In [3]:
# Importing necessary libraries for numerical operations, data manipulation, and visualization
import numpy as np  
import pandas as pd  
import matplotlib.pyplot as plt  
from sklearn.model_selection import train_test_split  

# Loading the dataset into a pandas DataFrame
df = pd.read_csv("Data/Advertising.csv")

# Display the first few rows of the dataset
df.head()


Unnamed: 0.1,Unnamed: 0,TV,radio,newspaper,sales
0,1,230.1,37.8,69.2,22.1
1,2,44.5,39.3,45.1,10.4
2,3,17.2,45.9,69.3,9.3
3,4,151.5,41.3,58.5,18.5
4,5,180.8,10.8,58.4,12.9


In [4]:
# Selecting independent variables (features)
predictors = ["TV", "radio", "newspaper"]
X = df[predictors]  

# Defining the dependent variable (sales)
y = df["sales"]  

# Display the first few rows of the feature set and target variable
X.head(), y.head()


(      TV  radio  newspaper
 0  230.1   37.8       69.2
 1   44.5   39.3       45.1
 2   17.2   45.9       69.3
 3  151.5   41.3       58.5
 4  180.8   10.8       58.4,
 0    22.1
 1    10.4
 2     9.3
 3    18.5
 4    12.9
 Name: sales, dtype: float64)

In [5]:
# Splitting the dataset into training and testing sets (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

# Display the dimensions of training and testing sets
X_train.shape, X_test.shape, y_train.shape, y_test.shape


((150, 3), (50, 3), (150,), (50,))

In [6]:
# Adding an intercept (bias) term to feature matrices
X_train = np.column_stack((np.ones(X_train.shape[0]), X_train))
X_test = np.column_stack((np.ones(X_test.shape[0]), X_test))

# Converting target variable to NumPy arrays
y_train = y_train.to_numpy()
y_test = y_test.to_numpy()


In [7]:
# Applying the Normal Equation to calculate the regression coefficients
coefficients = np.linalg.inv(X_train.T @ X_train) @ X_train.T @ y_train

# Display the computed coefficients
print("Regression Coefficients:", coefficients)


Regression Coefficients: [2.77830346e+00 4.54335586e-02 1.91456536e-01 2.56809082e-03]


In [8]:
# Making predictions on the training set
y_pred_train = X_train @ coefficients

# Computing Residual Sum of Squares (RSS)
RSS = np.sum((y_train - y_pred_train) ** 2)

# Computing Total Sum of Squares (TSS)
TSS = np.sum((y_train - np.mean(y_train)) ** 2)

# Calculating R-squared (R²) Score
R_squared = 1 - (RSS / TSS)

# Calculating Residual Standard Error (RSE)
n_samples, n_features = X_train.shape  # Getting number of samples and features
RSE = np.sqrt(RSS / (n_samples - n_features))

# Computing F-statistic
F_stat = ((TSS - RSS) / (n_features - 1)) / (RSS / (n_samples - n_features))

# Displaying results
print(f"R² Score: {R_squared:.4f}")
print(f"Residual Standard Error: {RSE:.4f}")
print(f"F-Statistic: {F_stat:.4f}")


R² Score: 0.8966
Residual Standard Error: 1.6863
F-Statistic: 422.2003


In [9]:
# Using only 'TV' as the predictor
X_tv = df[["TV"]]  
y_tv = df["sales"]

# Splitting into training and testing sets
X_tv_train, X_tv_test, y_tv_train, y_tv_test = train_test_split(X_tv, y_tv, random_state=42)

# Reshaping data for matrix operations
X_tv_train = X_tv_train.to_numpy().reshape(-1, 1)
X_tv_test = X_tv_test.to_numpy().reshape(-1, 1)

# Adding bias term (intercept)
X_train_tv = np.column_stack((np.ones(X_tv_train.shape[0]), X_tv_train))

# Calculating regression coefficients using Normal Equation
beta_tv = np.linalg.inv(X_train_tv.T @ X_train_tv) @ X_train_tv.T @ y_tv_train

# Displaying results
print("TV Model Coefficients:", beta_tv)


TV Model Coefficients: [7.13178451 0.04689446]


In [10]:
# Using 'radio' as the single predictor
X_radio = df[["radio"]]  
y_radio = df["sales"]

# Splitting into training and testing sets
X_radio_train, X_radio_test, y_radio_train, y_radio_test = train_test_split(X_radio, y_radio, random_state=42)

# Reshaping for matrix operations
X_radio_train = X_radio_train.to_numpy().reshape(-1, 1)
X_radio_test = X_radio_test.to_numpy().reshape(-1, 1)

# Adding bias term
X_train_radio = np.column_stack((np.ones(X_radio_train.shape[0]), X_radio_train))

# Computing regression coefficients
beta_radio = np.linalg.inv(X_train_radio.T @ X_train_radio) @ X_train_radio.T @ y_radio_train

# Displaying results
print("Radio Model Coefficients:", beta_radio)


Radio Model Coefficients: [9.51020268 0.2040429 ]


In [11]:
# Using 'newspaper' as the single predictor
X_news = df[["newspaper"]]  
y_news = df["sales"]

# Splitting into training and testing sets
X_news_train, X_news_test, y_news_train, y_news_test = train_test_split(X_news, y_news, random_state=42)

# Reshaping for matrix operations
X_news_train = X_news_train.to_numpy().reshape(-1, 1)
X_news_test = X_news_test.to_numpy().reshape(-1, 1)

# Adding bias term
X_train_news = np.column_stack((np.ones(X_news_train.shape[0]), X_news_train))

# Computing regression coefficients
beta_news = np.linalg.inv(X_train_news.T @ X_train_news) @ X_train_news.T @ y_news_train

# Displaying results
print("Newspaper Model Coefficients:", beta_news)


Newspaper Model Coefficients: [12.53433474  0.05812371]
