In [3]:
import array
from enum import Enum
from sklearn.base import RegressorMixin
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split

In [4]:
class ModelType(Enum):
    LINEAR_REGRESSION = LinearRegression
    RANDOM_FOREST_REGRESSION = RandomForestRegressor

In [8]:
class AiModel():
    """
    A class to encapsulate an AI model for regression tasks.

    This class initializes and trains a regression model based on the provided model type. It splits
    the data into training and testing sets and fits the chosen model to the training data.

    Attributes:
        model_type (ModelType): The type of regression model to be used.
        model (RegressorMixin): The regression model instance.
        training_features (array): The features used for training the model.
        training_target (array): The target values used for training the model.
        testin_features (array): The features used for testing the model.
        testing_target (array): The target values used for testing the model.

    Args:
        model_type (ModelType): The type of model to be created (e.g., LINEAR_REGRESSION, RANDOM_FOREST_REGRESSION).
        target (array): The target values for training and testing.
        features (array): The features for training and testing.
        **kwargs: Additional keyword arguments for model initialization (e.g., n_estimators, random_state).

    Methods:
        splitt_training_testing_data(target: array, features: array) -> tuple:
            Splits the given features and target into training and testing datasets.

    Raises:
        ValueError: If an unsupported model_type is provided.
    """
        
    model_type: ModelType
    model: RegressorMixin
    training_features: array
    training_target: array
    testin_features: array
    testing_target: array
    test_data_target_predictions: array
    meta_data: dict

    def __init__(self, model_type: ModelType, target: array, features: array, **kwargs):
        """
        Initializes the AiModel, splits the data, and trains the model.

        Args:
            model_type (ModelType): The type of model to be used.
            target (array): The target values for training and testing.
            features (array): The features for training and testing.
            **kwargs: Additional keyword arguments for model initialization.
                - 'n_estimators': Number of trees in the RandomForestRegressor (default: 100).
                - 'random_state': Random seed for reproducibility (default: None).

        Raises:
            ValueError: If an unsupported model_type is provided.
        """

        self.training_features, self.testin_features, self.training_target, self.testing_target = self.splitt_training_testing_data(target, features)
        self.meta_data = kwargs
        
        if(model_type == ModelType.LINEAR_REGRESSION):
            self.model = LinearRegression()
            self.model.fit(self.training_features, self.training_target)
            self.test_data_target_predictions = self.model.predict(self.testin_features)

        elif(model_type == ModelType.RANDOM_FOREST_REGRESSION):
            self.model = RandomForestRegressor
            self.model.fit(self.training_features, self.training_target, n_estimators=kwargs.get('n_estimators', 100), reandom_state=kwargs.get('random_state', None))
            self.test_data_target_predictions = self.model.predict(self.testin_features)
        
        else:
            raise ValueError(f"Unsupported model_type: {model_type}")
        

    def print_meta_data(self):
        print(self.meta_data)
        
        
    @staticmethod 
    def splitt_training_testing_data(features: array, target: array) -> tuple:
        """
        Splits the provided features and target into training and testing sets.

        Args:
            target (array): The target values to split.
            features (array): The features to split.

        Returns:
            tuple: A tuple containing the training features, testing features, training target, and testing target.

        """
        features_train, features_test, target_train, target_test = train_test_split(features, target, test_size=0.2, random_state=42)
        return features_train, features_test, target_train, target_test
    



In [9]:
import pandas as pd
housing_path = '../data/raw/AmesHousing.csv'
housing = pd.read_csv(housing_path)

model = AiModel(ModelType.LINEAR_REGRESSION, housing[['Overall Qual', 'Yr Sold']].values, housing['SalePrice'].values)