# Sales Dataset

In [22]:
import numpy as pd
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

In [23]:
data_url = "/content/advertising.csv"

In [24]:
df = pd.read_csv(data_url)

In [25]:
print('Dataset Preview')
print(df.head())

Dataset Preview
      TV  Radio  Newspaper  Sales
0  230.1   37.8       69.2   22.1
1   44.5   39.3       45.1   10.4
2   17.2   45.9       69.3   12.0
3  151.5   41.3       58.5   16.5
4  180.8   10.8       58.4   17.9


In [26]:
df = df.drop(columns=[df.columns[0]])

In [27]:
print('\nCorrelation Matrix:')
corr_matrix = df.corr()
print(corr_matrix)


Correlation Matrix:
              Radio  Newspaper     Sales
Radio      1.000000   0.354104  0.349631
Newspaper  0.354104   1.000000  0.157960
Sales      0.349631   0.157960  1.000000


In [28]:
target = 'Sales'
X = df.drop(columns=[target])
y = df[target]

In [29]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

In [30]:
mlr_model = LinearRegression()
mlr_model.fit(X_train, y_train)

In [31]:
mlr_predictions = mlr_model.predict(X_test)
mlr_mse = mean_squared_error(y_test, mlr_predictions)
mlr_r2 = r2_score(y_test, mlr_predictions)

In [32]:
coefficients = ' + '.join([f"({coef:.4f} * {col})" for coef, col in zip(mlr_model.coef_, X.columns)])

In [33]:
mlr_equation = f"sales = {coefficients} + {mlr_model.intercept_:.4f}"

In [34]:
print('\nMultiple Linear Regression:')
print(f"Eqauation : {mlr_equation}")
print(f"MSE: {mlr_mse:.4f}")
print(f"R-squared : {mlr_r2:.4f}")


Multiple Linear Regression:
Eqauation : sales = (0.1179 * Radio) + (0.0039 * Newspaper) + 12.5172
MSE: 27.5006
R-squared : 0.1100
