# Model Comparison

Evaluates multiple models to determine which one performs best on a given task. For this:

1) Training multiple models on the same dataset. Start with default hyperparameters for all models, identify the top-performing models, and then tune only those.
2) Evaluating their performance using appropriate metrics.
3) Comparing the results to select the best model.

Some examples:

### Comparing Classification Models

In [None]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# Load dataset
data = load_iris()
X, y = data.data, data.target

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define models
models = {
    'RandomForest': RandomForestClassifier(),
    'GradientBoosting': GradientBoostingClassifier(),
    'SVM': SVC()
}

# Train and evaluate models
results = {}
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    results[name] = accuracy

# Compare results
for name, accuracy in results.items():
    print(f"{name}: Accuracy = {accuracy:.4f}")

### Comparing Regression Models

In [None]:
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_squared_error

# Generate synthetic regression dataset
X, y = make_regression(n_samples=1000, n_features=20, noise=0.1)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define models
models = {
    'LinearRegression': LinearRegression(),
    'RandomForest': RandomForestRegressor(),
    'GradientBoosting': GradientBoostingRegressor()
}

# Train and evaluate models
results = {}
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    results[name] = mse

# Compare results
for name, mse in results.items():
    print(f"{name}: MSE = {mse:.4f}")

### Comparing Clustering Models

In [None]:
from sklearn.datasets import make_blobs
from sklearn.cluster import KMeans, AgglomerativeClustering
from sklearn.metrics import silhouette_score

# Generate synthetic clustering dataset
X, _ = make_blobs(n_samples=500, centers=4, cluster_std=1.0)

# Define models
models = {
    'KMeans': KMeans(n_clusters=4),
    'AgglomerativeClustering': AgglomerativeClustering(n_clusters=4)
}

# Train and evaluate models
results = {}
for name, model in models.items():
    model.fit(X)
    labels = model.labels_
    silhouette = silhouette_score(X, labels)
    results[name] = silhouette

# Compare results
for name, silhouette in results.items():
    print(f"{name}: Silhouette Score = {silhouette:.4f}")

### Comparing Timeseries Models

In [None]:
import pandas as pd
import numpy as np
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from fbprophet import Prophet
from sklearn.metrics import mean_absolute_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

# Example time series data (synthetic)
dates = pd.date_range(start='2020-01-01', periods=100, freq='D')
values = np.sin(np.linspace(0, 10, 100)) + np.random.normal(0, 0.1, 100)
data = pd.DataFrame({'ds': dates, 'y': values})

# Split data into train and test
train = data.iloc[:80]
test = data.iloc[80:]

# Function to evaluate models
def evaluate_model(model, train, test):
    model.fit(train)
    predictions = model.predict(test)
    mae = mean_absolute_error(test['y'], predictions)
    return mae

# Define models
models = {
    'ARIMA': ARIMA(train['y'], order=(1, 1, 1)),
    'SARIMA': SARIMAX(train['y'], order=(1, 1, 1), seasonal_order=(1, 1, 1, 12)),
    'Prophet': Prophet(),
    'LSTM': Sequential([
        LSTM(50, activation='relu', input_shape=(1, 1)),
        Dense(1)
    ])
}

# Train and evaluate models
results = {}
for name, model in models.items():
    if name == 'LSTM':
        # Reshape data for LSTM
        X_train = train['y'].values.reshape(-1, 1, 1)
        y_train = train['y'].values
        X_test = test['y'].values.reshape(-1, 1, 1)
        y_test = test['y'].values

        # Train LSTM
        model.compile(optimizer='adam', loss='mse')
        model.fit(X_train, y_train, epochs=50, verbose=0)
        predictions = model.predict(X_test).flatten()
        mae = mean_absolute_error(y_test, predictions)
    else:
        mae = evaluate_model(model, train, test)
    results[name] = mae

# Compare results
for name, mae in results.items():
    print(f"{name}: MAE = {mae:.4f}")

# Select the best model
best_model = min(results, key=results.get)
print(f"Best Model: {best_model}")