In [None]:
!pip install scikit-learn
!pip install tensorflow

In [None]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.impute import SimpleImputer
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler
import joblib


In [None]:
# LOAD CSV INTO DICT ARRAY
def load_csv(file_name):
    container = []
    file_path = f'{file_name}'

    with open(file_path, mode='r', newline='') as file:
        csv_reader = csv.DictReader(file)
        
        for row in csv_reader:
            container.append(row)

    return container

In [None]:


class create_model_suite:
    def __init__(self):
        self.model = None
    
        # Load the dataset
        self.raw_data = pd.read_csv('../finance_historical.csv')

    ########################################################################################################
    ########################################################################################################

    def train_model(self, model_name: str, model_params: dict) -> None:
        try:
            # Ensure the dataset is not empty
            if self.raw_data.empty:
                print("No data available to train the model.")
                return

            # Define features and target for classification
            X = self.raw_data[['open', 'high', 'low', 'volume']]
            y = (self.raw_data['close'].shift(-1) > self.raw_data['close']).astype(int)  # Binary classification target

            # Normalize features if specified
            if model_params.get("normalize", False):
                scaler = StandardScaler()
                X = scaler.fit_transform(X)
            
            # Split data into training and testing sets
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

            # Initialize and train the RandomForestClassifier with parameters
            self.model = RandomForestClassifier(
                n_estimators=model_params.get('n_estimators', 100),  # Number of trees
                max_depth=model_params.get('max_depth', None),       # Max depth of trees
                random_state=model_params.get('random_state', 42)    # Random state for reproducibility
            )

            # Train the model
            self.model.fit(X_train, y_train)

            # Make predictions on the test set
            y_pred = self.model.predict(X_test)

            # Evaluate the model's performance
            accuracy = accuracy_score(y_test, y_pred)
            report = classification_report(y_test, y_pred)
            print(f"Model trained with accuracy: {accuracy}")
            #print("Classification report:")
            #print(report)

            # Save the model using joblib
            joblib.dump(self.model, f'{model_name}.joblib')

        except Exception as e:
            print(f"Error during model training: {e}")
            raise

    ########################################################################################################
    ########################################################################################################

    def load_model(self, model_name: str) -> None:
        if self.model is not None:
            raise Exception('LOAD ERROR: A MODEL HAS ALREADY BEEN LOADED')
        try:
            # Load the model using joblib
            self.model = joblib.load(f'{model_name}.joblib')
            print(f"Model loaded from {model_name}")
        except Exception as e:
            print(f"Error loading model: {e}")
            raise

    ########################################################################################################
    ########################################################################################################

    def predict_outcome(self, row_index: int):
        if self.model is None:
            raise Exception('PREDICT ERROR: LOAD A MODEL FIRST')
        try:
            # Select the features for the specified row
            input_data = self.raw_data.iloc[row_index:row_index+1][['open', 'high', 'low', 'volume']]

            # Make the prediction
            prediction = self.model.predict(input_data)
            return prediction
        except Exception as e:
            print(f"Error during prediction: {e}")
            raise


In [None]:
suite = create_model_suite()

# Define RandomForest parameters dynamically
model_params = {
    'n_estimators': 100,      # Number of trees in the forest
    'max_depth': None,        # Max depth of the tree
    'random_state': 42,       # Random state for reproducibility
    'normalize': True         # Normalize features
}

# Train the model using the RandomForest parameters
suite.train_model('RandomForest', model_params)



# Make a prediction on the 10th row
prediction = suite.predict_outcome(10)
print(f"Prediction for row 10: {prediction}")


In [None]:
# Load the trained model
suite.load_model('RandomForest')

In [None]:
row_index=15
prediction = suite.predict_outcome(row_index)
print(prediction)