In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

class DataPreprocessor:
    def __init__(self, filepath, target_column):
        self.filepath = filepath
        self.target_column = target_column
        self.scaler = StandardScaler()

    def load_data(self):
        data = pd.read_csv(self.filepath)
        X = data.drop(self.target_column, axis=1)
        y = data[self.target_column]
        return X, y

    def split_and_scale_data(self, X, y, test_size=0.2, random_state=42):
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)
        X_train_scaled = self.scaler.fit_transform(X_train)
        X_test_scaled = self.scaler.transform(X_test)
        return X_train_scaled, X_test_scaled, y_train, y_test
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report, roc_curve, auc

class ModelTrainer:
    def __init__(self, model_type='logistic', regression=False):
        if regression:
            if model_type == 'linear':
                self.model = LinearRegression()
            elif model_type == 'decision_tree':
                self.model = DecisionTreeRegressor(random_state=42)
            elif model_type == 'random_forest':
                self.model = RandomForestRegressor(random_state=42)
            else:
                raise ValueError('Unsupported regression model type.')
        else:
            if model_type == 'logistic':
                self.model = LogisticRegression(random_state=42)
            elif model_type == 'svm':
                self.model = SVC(kernel='linear', random_state=42, probability=True)
            elif model_type == 'decision_tree':
                self.model = DecisionTreeClassifier(random_state=42)
            elif model_type == 'random_forest':
                self.model = RandomForestClassifier(random_state=42)
            else:
                raise ValueError('Unsupported model type.')

    def train(self, X_train, y_train):
        self.model.fit(X_train, y_train)

    def evaluate(self, X_test, y_test, regression=False):
        y_pred = self.model.predict(X_test)
        if regression:
            mse = mean_squared_error(y_test, y_pred)
            print(f"Mean Squared Error: {mse:.2f}")
            return mse
        else:
            accuracy = accuracy_score(y_test, y_pred)
            print(f"Accuracy: {accuracy * 100:.2f}%")
            self.plot_confusion_matrix(y_test, y_pred)
            self.plot_roc_curve(y_test, y_pred)
            print(classification_report(y_test, y_pred))
            return accuracy

    def plot_confusion_matrix(self, y_test, y_pred):
        cm = confusion_matrix(y_test, y_pred)
        plt.figure(figsize=(8, 6))
        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
        plt.title('Confusion Matrix')
        plt.xlabel('Predicted')
        plt.ylabel('True')
        plt.show()

    def plot_roc_curve(self, y_test, y_pred):
        fpr, tpr, _ = roc_curve(y_test, self.model.predict_proba(X_test)[:, 1])
        roc_auc = auc(fpr, tpr)
        plt.figure(figsize=(8, 6))
        plt.plot(fpr, tpr, color='blue', lw=2, label='ROC Curve (area = %0.2f)' % roc_auc)
        plt.plot([0, 1], [0, 1], color='red', lw=2, linestyle='--')
        plt.title('Receiver Operating Characteristic')
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.legend(loc='lower right')
        plt.show()
def main(filepath, target_column, model_type='logistic', regression=False):
    # Data Preprocessing
    preprocessor = DataPreprocessor(filepath, target_column)
    X, y = preprocessor.load_data()
    X_train_scaled, X_test_scaled, y_train, y_test = preprocessor.split_and_scale_data(X, y)

    # Model Training and Evaluation
    trainer = ModelTrainer(model_type, regression)
    trainer.train(X_train_scaled, y_train)
    trainer.evaluate(X_test_scaled, y_test, regression)

# Example Usage for Classification:
# main('your_classification_dataset.csv', 'target', model_type='svm')

# Example Usage for Regression:
# main('your_regression_dataset.csv', 'target', model_type='linear', regression=True)