In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from mlxtend.evaluate import bias_variance_decomp


X, y = make_regression(n_samples=1000, n_features=5, noise=0.5, random_state=42)


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

complexities = [1, 2, 3, 4, 5]  


bias_values = []
variance_values = []

for complexity in complexities:
   
    X_train_subset = X_train_scaled[:, :complexity]
    X_test_subset = X_test_scaled[:, :complexity]

    lr_model = LinearRegression()
    lr_model.fit(X_train_subset, y_train)

    avg_expected_loss, avg_bias, avg_var = bias_variance_decomp(
        lr_model, X_train_subset, y_train, X_test_subset, y_test, loss='mse', random_seed=42
    )

    bias_values.append(avg_bias)
    variance_values.append(avg_var)

    print(f"Complexity={complexity}: Bias={avg_bias:.4f}, Variance={avg_var:.4f}")


plt.figure(figsize=(10, 6))
plt.plot(complexities, bias_values, label='Bias')

plt.xlabel('Complexity (Number of Features)')
plt.ylabel('Bias')
plt.title('Bias and Variance vs. Complexity (Linear Regression)')
plt.legend()
plt.grid(True)
plt.show()