In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from xgboost import XGBClassifier, XGBRegressor
from sklearn.metrics import accuracy_score, mean_squared_error


# Load or generate dataset (classification & regression)

In [2]:
np.random.seed(42)
X = np.random.rand(500, 5)
y_reg = np.random.rand(500)  # Regression target
y_clf = np.random.randint(0, 2, 500)  # Classification target
print(X)

[[0.37454012 0.95071431 0.73199394 0.59865848 0.15601864]
 [0.15599452 0.05808361 0.86617615 0.60111501 0.70807258]
 [0.02058449 0.96990985 0.83244264 0.21233911 0.18182497]
 ...
 [0.98098738 0.48340744 0.86354752 0.58873152 0.37532991]
 [0.28578356 0.20322309 0.76179819 0.38654051 0.51127546]
 [0.49232548 0.57727903 0.86557715 0.98073934 0.40758421]]


# Split dataset

In [3]:
X_train_r, X_test_r, y_train_r, y_test_r = train_test_split(X, y_reg, test_size=0.2, random_state=42)
X_train_c, X_test_c, y_train_c, y_test_c = train_test_split(X, y_clf, test_size=0.2, random_state=42)
print(X)

[[0.37454012 0.95071431 0.73199394 0.59865848 0.15601864]
 [0.15599452 0.05808361 0.86617615 0.60111501 0.70807258]
 [0.02058449 0.96990985 0.83244264 0.21233911 0.18182497]
 ...
 [0.98098738 0.48340744 0.86354752 0.58873152 0.37532991]
 [0.28578356 0.20322309 0.76179819 0.38654051 0.51127546]
 [0.49232548 0.57727903 0.86557715 0.98073934 0.40758421]]


# Regression Models

In [4]:
models_reg = {
    'Simple Linear Regression': LinearRegression(),
    'Multiple Linear Regression': LinearRegression(),
    'Decision Tree Regressor': DecisionTreeRegressor(),
    'Random Forest Regressor': RandomForestRegressor(),
    'XGBoost Regressor': XGBRegressor()
}
print(models_reg)

{'Simple Linear Regression': LinearRegression(), 'Multiple Linear Regression': LinearRegression(), 'Decision Tree Regressor': DecisionTreeRegressor(), 'Random Forest Regressor': RandomForestRegressor(), 'XGBoost Regressor': XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=None, device=None, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             gamma=None, grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=None, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=None, max_leaves=None,
             min_child_weight=None, missing=nan, monotone_constraints=None,
             multi_strategy=None, n_estimators=None, n_jobs=None,
             num_parallel_tree=None, random_state=None, ...)}


# Classification Models

In [5]:
models_clf = {
    'Logistic Regression': LogisticRegression(),
    'Decision Tree Classifier': DecisionTreeClassifier(),
    'Random Forest Classifier': RandomForestClassifier(),
    'Naive Bayes': GaussianNB(),
    'SVC': SVC(),
    'XGBoost Classifier': XGBClassifier()
}
print(models_clf)


{'Logistic Regression': LogisticRegression(), 'Decision Tree Classifier': DecisionTreeClassifier(), 'Random Forest Classifier': RandomForestClassifier(), 'Naive Bayes': GaussianNB(), 'SVC': SVC(), 'XGBoost Classifier': XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, device=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, feature_types=None,
              gamma=None, grow_policy=None, importance_type=None,
              interaction_constraints=None, learning_rate=None, max_bin=None,
              max_cat_threshold=None, max_cat_to_onehot=None,
              max_delta_step=None, max_depth=None, max_leaves=None,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              multi_strategy=None, n_estimators=None, n_jobs=None,
              num_parallel_tree=None, random_state=None, ...)}


# Train & Evaluate Regression Models

In [6]:
for name, model in models_reg.items():
    model.fit(X_train_r, y_train_r)
    y_pred = model.predict(X_test_r)
    mse = mean_squared_error(y_test_r, y_pred)
    print(f'{name} - MSE: {mse:.4f}')


Simple Linear Regression - MSE: 0.0971
Multiple Linear Regression - MSE: 0.0971
Decision Tree Regressor - MSE: 0.1862
Random Forest Regressor - MSE: 0.1013
XGBoost Regressor - MSE: 0.1142


# Train & Evaluate Classification Models

In [7]:
for name, model in models_clf.items():
    model.fit(X_train_c, y_train_c)
    y_pred = model.predict(X_test_c)
    accuracy = accuracy_score(y_test_c, y_pred)
    print(f'{name} - Accuracy: {accuracy:.4f}')

Logistic Regression - Accuracy: 0.5300
Decision Tree Classifier - Accuracy: 0.5400
Random Forest Classifier - Accuracy: 0.5400
Naive Bayes - Accuracy: 0.5200
SVC - Accuracy: 0.5300
XGBoost Classifier - Accuracy: 0.5700
