In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, SGDRegressor
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt

# Preprocessing Function
def preprocess_data(X, categorical_features=[], numerical_features=[]):
    """
    Preprocesses the dataset by imputing missing values, encoding categorical features,
    and scaling numerical features. Returns a preprocessed pipeline.
    """
    # Handling numerical features (scaling and imputing)
    numerical_transformer = Pipeline(steps=[
        ('imputer', SimpleImputer(strategy='mean')),  # Fill missing numerical data with mean
        ('scaler', StandardScaler())  # Scale numerical features
    ])

    # Handling categorical features (imputing and one-hot encoding)
    categorical_transformer = Pipeline(steps=[
        ('imputer', SimpleImputer(strategy='most_frequent')),  # Fill missing categorical data with most frequent
        ('onehot', OneHotEncoder(handle_unknown='ignore'))  # One-hot encode categorical features
    ])

    # Combining both transformers
    preprocessor = ColumnTransformer(transformers=[
        ('num', numerical_transformer, numerical_features),
        ('cat', categorical_transformer, categorical_features)
    ])

    return preprocessor

# Dataset Preparation Function
def prepare_data(X, y, test_size=0.2, random_state=42, categorical_features=[], numerical_features=[]):
    """
    Prepares the dataset by splitting, preprocessing, and scaling.
    Returns preprocessed train/test splits.
    """
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)

    preprocessor = preprocess_data(X, categorical_features, numerical_features)

    X_train = preprocessor.fit_transform(X_train)
    X_test = preprocessor.transform(X_test)

    return X_train, X_test, y_train, y_test

# Linear Models Class
class LinearModels:
    def __init__(self):
        self.lin_reg = LinearRegression()
        self.sgd_reg = SGDRegressor()

    def train(self, X_train, y_train, method='normal'):
        """
        Trains the model using 'normal' (Linear Regression) or 'sgd' (SGDRegressor).
        """
        if method == 'normal':
            self.lin_reg.fit(X_train, y_train)
        elif method == 'sgd':
            self.sgd_reg.fit(X_train, y_train)

    def predict(self, X_test, method='normal'):
        """
        Makes predictions using 'normal' (Linear Regression) or 'sgd' (SGDRegressor).
        """
        if method == 'normal':
            return self.lin_reg.predict(X_test)
        elif method == 'sgd':
            return self.sgd_reg.predict(X_test)

    def evaluate(self, y_test, y_pred):
        """
        Evaluates the model using Mean Squared Error.
        """
        mse = mean_squared_error(y_test, y_pred)
        rmse = np.sqrt(mse)
        return mse, rmse

    def plot_predictions(self, y_test, y_pred):
        """
        Plots actual vs predicted values for visual comparison.
        """
        plt.scatter(y_test, y_pred)
        plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--')
        plt.xlabel('True Values')
        plt.ylabel('Predictions')
        plt.title('True vs Predicted')
        plt.show()

# Usage Example Function
def run_model(X, y, categorical_features=[], numerical_features=[], method='normal'):
    """
    Prepares data, trains the model, evaluates and plots predictions.
    Accepts 'normal' (LinearRegression) or 'sgd' (SGDRegressor).
    """
    X_train, X_test, y_train, y_test = prepare_data(X, y, categorical_features=categorical_features, numerical_features=numerical_features)

    model = LinearModels()
    model.train(X_train, y_train, method=method)

    y_pred = model.predict(X_test, method=method)

    mse, rmse = model.evaluate(y_test, y_pred)
    print(f'MSE: {mse}, RMSE: {rmse}')

    model.plot_predictions(y_test, y_pred)

# Sample usage with custom dataset
# df = pd.read_csv('your_dataset.csv')  # Replace this with your actual dataset
# X = df.drop('target_column', axis=1)  # Replace 'target_column' with the actual target column name
# y = df['target_column']
# categorical_features = ['col1', 'col2']  # Replace with your actual categorical columns
# numerical_features = ['col3', 'col4']    # Replace with your actual numerical columns
# run_model(X, y, categorical_features, numerical_features, method='normal')  # Or method='sgd'
