In [1]:
# Suppress Warnings
import warnings
warnings.filterwarnings('ignore')

# Import essential libraries
import numpy as np
import pandas as pd

# Visualization libraries
import matplotlib.pyplot as plt
import seaborn as sns

# Linear Regression model and metrics
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error


In [2]:
# Load the dataset
df = pd.read_csv("advertising.csv")
df

In [3]:
# Check dataset shape
df.shape

In [4]:
# Descriptive statistics
df.describe()

In [5]:
# Check for missing values
df.isnull().sum() * 100 / df.shape[0]

In [6]:
# Visualize outliers using boxplots
fig, axs = plt.subplots(3, figsize=(5, 5))
sns.boxplot(df['TV'], ax=axs[0])
sns.boxplot(df['Newspaper'], ax=axs[1])
sns.boxplot(df['Radio'], ax=axs[2])
plt.tight_layout()

In [7]:
# Scatter plot for Sales vs Media Channels
plt.scatter(df['TV'], df['Sales'], label='TV')
plt.scatter(df['Radio'], df['Sales'], label='Radio')
plt.scatter(df['Newspaper'], df['Sales'], label='Newspaper')
plt.xlabel('Advertising Mediums')
plt.ylabel('Sales')
plt.title('Advertising Mediums vs Sales')
plt.legend()
plt.show()

In [8]:
# Pairplot to visualize correlations
sns.pairplot(df, x_vars=['TV', 'Radio', 'Newspaper'], y_vars='Sales', height=4, kind='scatter')
plt.show()

In [9]:
# Correlation heatmap
sns.heatmap(df.corr(), annot=True, cmap='YlGnBu')
plt.show()

In [10]:
# Prepare data for linear regression
x = df[['TV']]  # Feature
s = df[['Sales']]  # Target

reg = LinearRegression()
reg.fit(x, s)

In [11]:
# Retrieve model parameters
m = reg.coef_
c = reg.intercept_
print(f'Slope: {m}, Intercept: {c}')

In [12]:
# Predict sales for a TV spend of 177
prediction = reg.predict([[177]])
print(f'Predicted Sales: {prediction}')

In [13]:
# Add predictions to the DataFrame
df['Predicted_Y'] = reg.predict(x)
df.head()

In [14]:
# Calculate residuals (loss)
df['lose'] = df['Sales'] - df['Predicted_Y']
df.head()

In [15]:
# Evaluate the model (MSE & MAE)
mse = mean_squared_error(df['Sales'], df['Predicted_Y'])
mae = mean_absolute_error(df['Sales'], df['Predicted_Y'])
print(f'MSE: {mse}, MAE: {mae}')

In [16]:
# Visualize the regression line
plt.plot(x, reg.predict(x), color='red', label='Regression Line')
plt.scatter(df['TV'], df['Sales'], label='Actual Data')
plt.xlabel('TV')
plt.ylabel('Sales')
plt.title('TV vs Sales Regression')
plt.legend()
plt.show()

In [17]:
# Calculate R² score
from sklearn.metrics import r2_score
r2 = r2_score(df['Sales'], df['Predicted_Y'])
print(f'R² Score: {r2}')