# Regression Models and Regression Metrics

In [4]:
!pip install lightgbm




### 1) Using Various Regression Models

In [5]:
import numpy as np
import pandas as pd
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.neighbors import KNeighborsRegressor
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

### 2) Synthetic Dataset

In [6]:
# Generate sample regression data
X, y = make_regression(n_samples=1000, n_features=10, noise=0.1, random_state=42)

# Splitting the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


### 3) Training and Test Datasets

In [7]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### 4) Regression Models

In [8]:
# Define models
models = {
    'Linear Regression': LinearRegression(),
    'Support Vector Machine': SVR(),
    'Random Forest': RandomForestRegressor(),
    'K-Nearest Neighbors': KNeighborsRegressor(),
    'XGBoost': XGBRegressor(),
    'LightGBM': LGBMRegressor()
}

### 5) Training and Evaluating the Regression Models

In [9]:
# Train and evaluate models
for name, model in models.items():
    print(f"Training {name}...")
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    # Calculate regression metrics
    mse = mean_squared_error(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    print(f"Metrics for {name}:")
    print(f"Mean Squared Error: {mse}")
    print(f"Mean Absolute Error: {mae}")
    print(f"R^2 Score: {r2}")
    print("==========================================")

Training Linear Regression...
Metrics for Linear Regression:
Mean Squared Error: 0.009511914910420909
Mean Absolute Error: 0.07773328130098563
R^2 Score: 0.9999994362016347
Training Support Vector Machine...
Metrics for Support Vector Machine:
Mean Squared Error: 12758.146129860857
Mean Absolute Error: 89.70507889979308
R^2 Score: 0.24378823816737083
Training Random Forest...
Metrics for Random Forest:
Mean Squared Error: 2539.1602140973346
Mean Absolute Error: 39.88197789542273
R^2 Score: 0.8494967215821658
Training K-Nearest Neighbors...
Metrics for K-Nearest Neighbors:
Mean Squared Error: 3728.3442939729885
Mean Absolute Error: 48.86073406351671
R^2 Score: 0.77901038453659
Training XGBoost...
Metrics for XGBoost:
Mean Squared Error: 1709.6566968047127
Mean Absolute Error: 33.272145604976856
R^2 Score: 0.8986637643384835
Training LightGBM...
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000533 seconds.
You can set `force_col_wise=true` to rem