In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

In [None]:
df = pd.read_csv("C:/Users/nellu/OneDrive/Pictures/Desktop/advertising.csv")
df.head()

In [None]:
print("Missing Values:\n", df.isnull().sum())
print("\nBasic Statistics:\n", df.describe())


In [None]:
plt.figure(figsize=(12, 4))
for i, col in enumerate(df.columns, 1):
    plt.subplot(1, 4, i)
    sns.histplot(df[col], kde=True)
    plt.title(f"{col} Distribution")
plt.tight_layout()
plt.show()

In [None]:
splits = [(0.8, 0.2), (0.7, 0.3), (0.6, 0.4)]
split_sizes = []
for train_ratio, test_ratio in splits:
    X = df[["TV"]]  
    y = df["Sales"]
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=test_ratio, random_state=42
    )
    split_sizes.append((train_ratio, test_ratio, len(X_train), len(X_test)))
    print("\nTrain/Test Sizes:")
for s in split_sizes:
    print(f"Train:Test = {int(s[0]*100)}:{int(s[1]*100)} -> Train size: {s[2]}, Test size: {s[3]}")

In [None]:
for train_ratio, test_ratio in splits:
    print(f"\n--- Simple Linear Regression (Train {int(train_ratio*100)}%, Test {int(test_ratio*100)}%) ---")
    X = df[["TV"]]
    y = df["Sales"]
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=test_ratio, random_state=42
    )
    
    lr = LinearRegression()
    lr.fit(X_train, y_train)
    y_pred = lr.predict(X_test)
    
    # Metrics
    r2 = r2_score(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    print(f"R²: {r2:.4f}, MSE: {mse:.4f}, MAE: {mae:.4f}")

In [None]:
plt.scatter(X_test, y_test, color='blue', label="Actual")

In [None]:
   plt.scatter(X_test, y_pred, color='red', label="Predicted")

In [None]:
 plt.plot(X_test, y_pred, color='green')

In [None]:
 plt.xlabel("TV Spend")

In [None]:
 plt.ylabel("Sales")

In [None]:
residuals = y_test - y_pred
    sns.histplot(residuals, kde=True)
    plt.title("Residual Distribution")
    plt.show()

In [None]:
for train_ratio, test_ratio in splits:
    print(f"\n--- Multiple Linear Regression (Train {int(train_ratio*100)}%, Test {int(test_ratio*100)}%) ---")
    X = df[["TV", "Radio", "Newspaper"]]
    y = df["Sales"]
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=test_ratio, random_state=42
    )
    
    lr = LinearRegression()
    lr.fit(X_train, y_train)
    y_pred = lr.predict(X_test)

In [None]:
r2 = r2_score(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    print(f"R²: {r2:.4f}, MSE: {mse:.4f}, MAE: {mae:.4f}")

In [None]:
plt.scatter(y_test, y_pred)

In [None]:
 plt.xlabel("Actual Sales")

In [None]:
plt.ylabel("Predicted Sales")

In [None]:
plt.title(f"Multiple LR (Train {int(train_ratio*100)}%, Test {int(test_ratio*100)}%)")
    plt.show()

In [None]:
 residuals = y_test - y_pred
    sns.histplot(residuals, kde=True)
    plt.title("Residual Distribution")
    plt.show()