In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.linear_model import LinearRegression, Ridge, LogisticRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import (
    mean_squared_error,
    mean_absolute_error,
    r2_score,
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    confusion_matrix,
    classification_report
)
from sklearn.datasets import make_classification

np.random.seed(42)

ModuleNotFoundError: No module named 'pandas'

In [None]:


# REGRESSION ANALYSIS

X = 2 * np.random.rand(200, 1)
y = 4 + 3 * X + np.random.randn(200, 1)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [None]:

# Linear Regression

lin_reg = LinearRegression()
lin_reg.fit(X_train, y_train)

y_pred_train = lin_reg.predict(X_train)
y_pred_test = lin_reg.predict(X_test)

print("\n===== Linear Regression =====")
print("Train R2:", r2_score(y_train, y_pred_train))
print("Test R2 :", r2_score(y_test, y_pred_test))
print("Test MSE:", mean_squared_error(y_test, y_pred_test))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test, y_pred_test)))
print("Test MAE:", mean_absolute_error(y_test, y_pred_test))


In [None]:
# Decision Tree (Overfitting Example)

tree = DecisionTreeRegressor(max_depth=None)
tree.fit(X_train, y_train)

y_tree_train = tree.predict(X_train)
y_tree_test = tree.predict(X_test)

print("\n===== Decision Tree Regressor =====")
print("Train R2:", r2_score(y_train, y_tree_train))
print("Test R2 :", r2_score(y_test, y_tree_test))

In [None]:
# 3. Polynomial Regression
poly = PolynomialFeatures(degree=10)  # high degree to show overfitting
X_poly_train = poly.fit_transform(X_train)
X_poly_test = poly.transform(X_test)

poly_reg = LinearRegression()
poly_reg.fit(X_poly_train, y_train)

y_poly_train = poly_reg.predict(X_poly_train)
y_poly_test = poly_reg.predict(X_poly_test)

print("\n===== Polynomial Regression (degree=10) =====")
print("Train R2:", r2_score(y_train, y_poly_train))
print("Test R2 :", r2_score(y_test, y_poly_test))


# Ridge Regression (Reducing Overfitting)

ridge = Ridge(alpha=1.0)
ridge.fit(X_poly_train, y_train)

y_ridge_test = ridge.predict(X_poly_test)

print("\n===== Ridge Regression =====")
print("Test R2:", r2_score(y_test, y_ridge_test))


In [None]:

# VISUALIZE UNDERFITTING VS OVERFITTING

plt.scatter(X, y, alpha=0.4)

X_plot = np.linspace(0, 2, 100).reshape(-1, 1)
plt.plot(X_plot, lin_reg.predict(X_plot), label="Linear", color="green")

X_plot_poly = poly.transform(X_plot)
plt.plot(X_plot, poly_reg.predict(X_plot_poly),
         label="Polynomial (Overfit)", color="red")

plt.legend()
plt.title("Underfitting vs Overfitting")
plt.show()


In [None]:
# CLASSIFICATION ANALYSIS

# Create synthetic classification dataset
X_cls, y_cls = make_classification(
    n_samples=500,
    n_features=2,
    n_classes=2,
    n_redundant=0,
    random_state=42
)

X_train_c, X_test_c, y_train_c, y_test_c = train_test_split(
    X_cls, y_cls, test_size=0.2, random_state=42
)


In [None]:
# Logistic Regression
clf = LogisticRegression()
clf.fit(X_train_c, y_train_c)

y_pred_cls = clf.predict(X_test_c)

print("\n===== Logistic Regression (Classification) =====")
print("Accuracy :", accuracy_score(y_test_c, y_pred_cls))
print("Precision:", precision_score(y_test_c, y_pred_cls))
print("Recall   :", recall_score(y_test_c, y_pred_cls))
print("F1 Score :", f1_score(y_test_c, y_pred_cls))
print("\nConfusion Matrix:\n", confusion_matrix(y_test_c, y_pred_cls))
print("\nClassification Report:\n",
      classification_report(y_test_c, y_pred_cls))

In [None]:

# BIAS-VARIANCE INTERPRETATION

print("\n===== Bias-Variance Insight =====")
print("Linear Regression: Balanced bias & variance.")
print("Decision Tree: Low bias, High variance (overfitting risk).")
print("Polynomial degree=10: Very low bias, High variance.")
print("Ridge: Reduces variance using regularization.")