In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
import joblib

In [2]:
# Base Class for Dataset Handling
class Dataset:
    def __init__(self):
        self.data = None
        self.target = None
        self.X_train = None
        self.X_test = None
        self.y_train = None
        self.y_test = None

    def load_data(self):
        # Load Iris dataset
        iris = load_iris()
        self.data = iris.data
        self.target = iris.target

    def preprocess(self):
        # Train-test split
        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(
            self.data, self.target, test_size=0.2, random_state=42
        )

In [3]:
# Base Class for Model Selection and Tuning
class ModelSelector:
    def __init__(self):
        self.models = {
            'RandomForest': RandomForestClassifier(),
            'SVM': SVC(),
            'LogisticRegression': LogisticRegression(max_iter=200)
        }
        self.best_model = None

    def hyperparameter_tuning(self, model, param_grid, X_train, y_train):
        grid_search = GridSearchCV(model, param_grid, cv=5, n_jobs=-1, verbose=1)
        grid_search.fit(X_train, y_train)
        return grid_search.best_estimator_

    def select_model(self, X_train, y_train):
        # Define parameter grids for each model
        param_grids = {
            'RandomForest': {'n_estimators': [10, 50, 100], 'max_depth': [3, 5, 7]},
            'SVM': {'C': [0.1, 1, 10], 'kernel': ['linear', 'rbf']},
            'LogisticRegression': {'C': [0.01, 0.1, 1]}
        }

        best_score = 0
        for model_name, model in self.models.items():
            print(f"Tuning {model_name}...")
            best_model = self.hyperparameter_tuning(model, param_grids[model_name], X_train, y_train)
            # Evaluate on training data to get best model
            score = accuracy_score(y_train, best_model.predict(X_train))
            print(f"{model_name} score: {score}")

            if score > best_score:
                self.best_model = best_model
                best_score = score

        print(f"Best Model: {self.best_model}")
        return self.best_model
