Imports


In [None]:
import sys
import os
sys.path.append('.')
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('default')
sns.set_palette("husl")
plt.rcParams['figure.figsize'] = (12, 8)

##  Data Exploration and Understanding

In [None]:

from regression import load_california_houses_data
X, y, feature_names = load_california_houses_data()
print("California Housing Dataset Overview:")
print("=" * 50)
print(f"Dataset shape: {X.shape}")
print(f"Number of features: {X.shape[1]}")
print(f"Number of samples: {X.shape[0]}")
print(f"Target range: ${y.min():.0f} - ${y.max():.0f}")
df = pd.DataFrame(X, columns=feature_names)
df['Median_House_Value'] = y
print("\nFeature Statistics:")
print(df.describe().round(2))

In [None]:
fig, axes = plt.subplots(4, 4, figsize=(16, 12))
axes = axes.ravel()
for i, feature in enumerate(feature_names[:13]):  
    axes[i].hist(df[feature], bins=30, alpha=0.7, edgecolor='black')
    axes[i].set_title(f'Distribution of {feature}')
    axes[i].set_xlabel(feature)
    axes[i].set_ylabel('Frequency')
for i in range(len(feature_names), len(axes)):
    fig.delaxes(axes[i])
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(12, 10))
correlation_matrix = df.corr()
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', center=0,
            square=True, linewidths=0.5, fmt='.2f')
plt.title('Feature Correlation Matrix')
plt.tight_layout()
plt.show()
print("\nTop correlations with Median House Value:")
corr_with_target = correlation_matrix['Median_House_Value'].sort_values(ascending=False)
print(corr_with_target)

##  Run  Regression 

In [None]:
from regression import run_regression
print("Running Complete Regression Analysis...")
print("=" * 60)
manual_results, sklearn_results, feature_names = run_regression()

## Model Analysis

In [None]:
print("\nCoefficient Comparison: Manual vs Scikit-Learn")
print("=" * 55)
feature_names_with_bias = ['Bias'] + feature_names
manual_linear_coef = manual_results['linear']['coefficients']
sklearn_linear_coef = sklearn_results['linear']['coefficients']
coef_comparison = pd.DataFrame({
    'Feature': feature_names_with_bias,
    'Manual_Coefficient': manual_linear_coef,
    'Sklearn_Coefficient': sklearn_linear_coef,
    'Difference': np.abs(manual_linear_coef - sklearn_linear_coef)
})
print("\nLinear Regression Coefficients Comparison:")
print(coef_comparison.round(6))

In [None]:
plt.figure(figsize=(14, 8))
features_to_plot = min(8, len(feature_names))
x_pos = np.arange(features_to_plot)
width = 0.35
plt.bar(x_pos - width/2, manual_linear_coef[1:features_to_plot+1], width,
        label='Manual Implementation', alpha=0.7)
plt.bar(x_pos + width/2, sklearn_linear_coef[1:features_to_plot+1], width,
        label='Scikit-Learn', alpha=0.7)
plt.xlabel('Features')
plt.ylabel('Coefficient Value')
plt.title('Linear Regression Coefficients: Manual vs Scikit-Learn (First 8 Features)')
plt.xticks(x_pos, feature_names[:features_to_plot], rotation=45, ha='right')
plt.legend()
plt.grid(True, alpha=0.3, axis='y')
plt.tight_layout()
plt.show()

##  Regularization  Analysis

In [None]:
print("\nRegularization Effects on Coefficients")
print("=" * 45)
linear_coef = manual_results['linear']['coefficients'][1:8]  
ridge_coef = manual_results['ridge']['coefficients'][1:8]
lasso_coef = manual_results['lasso']['coefficients'][1:8]
reg_comparison = pd.DataFrame({
    'Feature': feature_names[:7],
    'Linear': linear_coef,
    f"Ridge (α={manual_results['ridge']['best_alpha']})": ridge_coef,
    f"Lasso (α={manual_results['lasso']['best_alpha']})": lasso_coef
})
print("\nCoefficient values across different regularization types (first 7 features):")
print(reg_comparison.round(6))

In [None]:
plt.figure(figsize=(12, 6))
features_to_plot = min(7, len(feature_names))
x_pos = np.arange(features_to_plot)
width = 0.25
plt.bar(x_pos - width, linear_coef, width, label='Linear (No Reg)', alpha=0.8)
plt.bar(x_pos, ridge_coef, width, label=f'Ridge (α={manual_results["ridge"]["best_alpha"]})', alpha=0.8)
plt.bar(x_pos + width, lasso_coef, width, label=f'Lasso (α={manual_results["lasso"]["best_alpha"]})', alpha=0.8)
plt.xlabel('Features')
plt.ylabel('Coefficient Value')
plt.title('Regularization Effects: Coefficient Comparison (First 7 Features)')
plt.xticks(x_pos, feature_names[:features_to_plot], rotation=45, ha='right')
plt.legend()
plt.grid(True, alpha=0.3, axis='y')
plt.tight_layout()
plt.show()

##  Performance Analysis 

In [None]:
performance_data = []
for model_type in ['linear', 'ridge', 'lasso']:
    manual = manual_results[model_type]
    sklearn = sklearn_results[model_type] 
    performance_data.append({
        'Model': model_type.upper(),
        'Implementation': 'Manual',
        'MSE': manual['mse'],
        'MAE': manual['mae'],
        'R²': manual['r2']
    })   
    performance_data.append({
        'Model': model_type.upper(),
        'Implementation': 'Scikit-Learn',
        'MSE': sklearn['mse'],
        'MAE': sklearn['mae'],
        'R²': sklearn['r2']
    })
performance_df = pd.DataFrame(performance_data)
print("Comprehensive Performance Comparison")
print("=" * 50)
print(performance_df.round(4))

##  Key Findings 

In [None]:
print("KEY FINDINGS AND INSIGHTS")
print("=" * 40)
mse_differences = []
for model_type in ['linear', 'ridge', 'lasso']:
    manual_mse = manual_results[model_type]['mse']
    sklearn_mse = sklearn_results[model_type]['mse']
    diff_percent = (abs(manual_mse - sklearn_mse) / sklearn_mse) * 100
    mse_differences.append(diff_percent)
avg_difference = np.mean(mse_differences)
print(f"\n1. IMPLEMENTATION ACCURACY:")
print(f"   Average MSE difference between manual and scikit-learn: {avg_difference:.4f}%")
print(f"   This indicates excellent implementation accuracy.")
print(f"\n2. REGULARIZATION EFFECTIVENESS:")
best_manual_model = min(manual_results.keys(), key=lambda x: manual_results[x]['mse'])
best_sklearn_model = min(sklearn_results.keys(), key=lambda x: sklearn_results[x]['mse'])
print(f"   Best performing manual model: {best_manual_model.upper()}")
print(f"   Best performing scikit-learn model: {best_sklearn_model.upper()}")
print(f"\n3. MODEL PERFORMANCE INSIGHTS:")
for model_type in ['linear', 'ridge', 'lasso']:
    r2 = manual_results[model_type]['r2']
    performance_level = 'Excellent fit' if r2 > 0.7 else 'Good fit' if r2 > 0.5 else 'Moderate fit' if r2 > 0.3 else 'Poor fit'
    print(f"   {model_type.upper():6} - R² = {r2:.4f}: {performance_level}")
print(f"\n4. REGULARIZATION ANALYSIS:")
print(f"   Ridge optimal alpha: {manual_results['ridge']['best_alpha']}")
print(f"   Lasso optimal alpha: {manual_results['lasso']['best_alpha']}")
print(f"   Ridge provides slight improvement over linear regression")
print(f"   Lasso shows minimal effect, indicating all features are important")
print(f"\n5. PRACTICAL IMPLICATIONS:")
best_mse = manual_results[best_manual_model]['mse']
best_mae = manual_results[best_manual_model]['mae']
print(f"   Best model prediction error: ±${best_mae:.0f} (MAE)")
print(f"   This represents good accuracy for housing price predictions.")

##  Algorithm  Demonstration

In [None]:
print("ALGORITHM UNDERSTANDING DEMONSTRATION")
print("=" * 50)
print("""
Normal Equations vs Gradient Descent:
• Normal Equations: Direct solution using matrix operations
  w = (X^T X)^(-1) X^T y
  - Pros: Exact solution, fast for small datasets
  - Cons: Computationally expensive for large datasets (O(n³))
• Gradient Descent: Iterative optimization
  w = w - α * ∇J(w)
  - Pros: Scalable to large datasets, can handle non-closed-form solutions
  - Cons: Requires learning rate tuning, may converge to local minima
Regularization Techniques:
• L2 Regularization (Ridge): Adds penalty λ||w||² to loss function
  - Shrinks coefficients but doesn't set them to zero
  - Helps with multicollinearity and overfitting
• L1 Regularization (Lasso): Adds penalty λ||w||₁ to loss function
  - Can set coefficients to zero, performing feature selection
  - Useful for high-dimensional datasets
Key Implementation Challenges Solved:
1. Numerical stability in matrix inversion
2. Proper handling of bias term in regularization
3. Efficient gradient computation
4. Appropriate learning rate selection
5. Convergence criteria for iterative methods
""")