In [None]:
# Importing the necessary libraries for data analysis and modeling
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, GridSearchCV, KFold,cross_val_score
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.linear_model import LinearRegression, Ridge, Lasso

In [None]:
# Loading the dataset 'Advertising.csv'
df = pd.read_csv('Advertising.csv')

In [None]:
# Displaying basic information about the dataset
num_records, num_features = df.shape
print(f"Number of records: {num_records}")
print(f"Number of features: {num_features}")
df.info()

In [None]:
# Exploring the dataset's statistic
df.describe()

In [None]:
#identifying any missing values
df.isna().sum()

In [None]:
# Visualizing the distribution and potential outliers using box plots
plt.boxplot(df, vert=False, labels=df.columns)
plt.show()

In [None]:
# Verify if 'Unnamed: 0' column exists before attempting to drop it
if 'Unnamed: 0' in df.columns:
    df.drop(columns='Unnamed: 0', inplace=True)
    print("'Unnamed: 0' column has been removed.")
else:
    print("'Unnamed: 0' column not found.")

In [None]:
# Separating the features (X) and the target variable (y)
X = df.iloc[:, :-1]
X
y = df.iloc[:, -1]
y

In [None]:
# Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=43)

In [None]:
X_train
y_train

In [None]:
X_test
y_test

In [None]:
# Initializing and training a Linear Regression model
linear_model = LinearRegression()
linear_model.fit(X_train, y_train)

In [None]:
# Predicting using the trained model and evaluating its performance
y_pred = linear_model.predict(X_test)
y_pred

In [None]:
train_score = linear_model.score(X_train, y_train) * 100
train_score

In [None]:
test_score = linear_model.score(X_test, y_test) * 100
test_score

In [None]:
mse = mean_squared_error(y_test, y_pred)
mse

In [None]:
mae = mean_absolute_error(y_test, y_pred)
mae

In [None]:
r2 = r2_score(y_test, y_pred) * 100
r2

In [None]:
# Implementing K-Fold Cross Validation for a more robust evaluation
cv = KFold(n_splits=5, shuffle=True, random_state=0)
cv

In [None]:
cross_val_scores = cross_val_score(linear_model, X, y, cv=cv, n_jobs=-1)
avg_cross_val_score = np.mean(cross_val_scores)
avg_cross_val_score

In [None]:
# Initializing and tuning a Ridge Regression model using GridSearchCV
ridge_model = Ridge()
param_grid = {
    'alpha': [1.0, 0.01, 0.2, 0.3, 0.05, 0.07, 0.09, 0.1, 0.3, 0.6, 0.9],
    'fit_intercept': [True],
    'solver': ['auto'], 
    'positive': [False],
    'random_state': [0]
}


In [None]:
grid_search_ridge = GridSearchCV(ridge_model, param_grid=param_grid, cv=5)
grid_search_ridge

In [None]:
grid_search_ridge.fit(X_train, y_train)

In [None]:
best_ridge_params = grid_search_ridge.best_params_
best_ridge_params

In [None]:
best_ridge_score = grid_search_ridge.best_score_
best_ridge_score

In [None]:
# Initializing and tuning a Lasso Regression model using GridSearchCV
lasso_model = Lasso()
param_grid_lasso = {
    'alpha': [1.0, 0.01, 0.2, 0.3, 0.01, 0.02],
    'fit_intercept': [True],
    'selection': ['cyclic'],
    'random_state': [0]
}


In [None]:
grid_search_lasso = GridSearchCV(lasso_model, param_grid=param_grid_lasso, cv=5, n_jobs=-1)
grid_search_lasso

In [None]:
grid_search_lasso.fit(X_train, y_train)

In [None]:
best_lasso_params = grid_search_lasso.best_params_
best_lasso_params

In [None]:
best_lasso_score = grid_search_lasso.best_score_
best_lasso_score