In [5]:
# Import necessary libraries\
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler
from statsmodels.stats.outliers_influence import variance_inflation_factor

In [6]:
# Load the data
file_path = 'advertising.csv'  # Replace with your file path
advertising_data = pd.read_csv(file_path)

In [7]:
# Check for outliers (using Z-score for example)
z_scores = stats.zscore(advertising_data)
abs_z_scores = np.abs(z_scores)
filtered_entries = (abs_z_scores < 3).all(axis=1)
advertising_data = advertising_data[filtered_entries]

In [8]:
# Check for multicollinearity
vif_data = pd.DataFrame()
vif_data["feature"] = advertising_data.columns
vif_data["VIF"] = [variance_inflation_factor(advertising_data.values, i) for i in range(len(advertising_data.columns))]
print(vif_data)


     feature        VIF
0         TV  24.945471
1      Radio   6.519384
2  Newspaper   3.368552
3      Sales  44.403915


In [9]:
# Feature Scaling - Standardization
scaler = StandardScaler()
scaled_features = scaler.fit_transform(advertising_data.drop('Sales', axis=1))

In [10]:
# Preparing the data
X = scaled_features
y = advertising_data['Sales'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [11]:
# Linear Regression Model
linear_model = LinearRegression()
linear_model.fit(X_train, y_train)
y_pred_linear = linear_model.predict(X_test)
mse_linear = mean_squared_error(y_test, y_pred_linear)
r2_linear = r2_score(y_test, y_pred_linear)

In [12]:
# Cross-validation for Linear Regression
cv_scores = cross_val_score(linear_model, X, y, cv=5)
print("Cross-validated scores for Linear Regression:", cv_scores)

Cross-validated scores for Linear Regression: [0.87197347 0.93419089 0.92100991 0.84281448 0.90232299]


In [13]:
# Decision Tree Regressor
tree_model = DecisionTreeRegressor(random_state=42)
tree_model.fit(X_train, y_train)
y_pred_tree = tree_model.predict(X_test)
mse_tree = mean_squared_error(y_test, y_pred_tree)
r2_tree = r2_score(y_test, y_pred_tree)

In [14]:
# Random Forest Regressor
forest_model = RandomForestRegressor(n_estimators=100, random_state=42)
forest_model.fit(X_train, y_train)
y_pred_forest = forest_model.predict(X_test)
mse_forest = mean_squared_error(y_test, y_pred_forest)
r2_forest = r2_score(y_test, y_pred_forest)

In [15]:
# Displaying the results
print("Linear Regression - MSE:", mse_linear, "R2:", r2_linear)
print("Decision Tree - MSE:", mse_tree, "R2:", r2_tree)
print("Random Forest - MSE:", mse_forest, "R2:", r2_forest)

Linear Regression - MSE: 2.4157289455935094 R2: 0.9130003152798272
Decision Tree - MSE: 1.9524999999999992 R2: 0.9296829701337195
Random Forest - MSE: 1.0095954250000028 R2: 0.9636405881420818


In [16]:
# User input for prediction (with scaling)
tv_budget = float(input("Enter TV advertising budget: "))
radio_budget = float(input("Enter Radio advertising budget: "))
newspaper_budget = float(input("Enter Newspaper advertising budget: "))
user_input_scaled = scaler.transform([[tv_budget, radio_budget, newspaper_budget]])

Enter TV advertising budget: 80
Enter Radio advertising budget: 20
Enter Newspaper advertising budget: 42




In [17]:
# Making predictions
predicted_sales_linear = linear_model.predict(user_input_scaled)[0]
predicted_sales_tree = tree_model.predict(user_input_scaled)[0]
predicted_sales_forest = forest_model.predict(user_input_scaled)[0]


In [18]:
# Displaying the predicted sales
print("\nPredicted Sales")
print("Linear Regression:", predicted_sales_linear)
print("Decision Tree:", predicted_sales_tree)
print("Random Forest:", predicted_sales_forest)


Predicted Sales
Linear Regression: 11.274451077379908
Decision Tree: 10.5
Random Forest: 11.772999999999985
