# **Setup**

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import KFold
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import StandardScaler

# **Load Data**

In [4]:
def load_data(csv_file_path):

# Load data from a CSV file into a DataFrame.
# Parameters:  csv_file_path (str): File path of the CSV file.
# Returns:  pandas.DataFrame: Loaded DataFrame containing the data.

    try:
        df = pd.read_csv(csv_file_path)
        return df
    except Exception as e:
        print("Error occurred while loading the CSV file.")
        print(e)
        return None

# **Preprocess Data**

In [5]:
def preprocess_data(df):

    # Preprocess the DataFrame.
    # Parameters:   df (pandas.DataFrame): DataFrame containing the data.
    # Returns:   pandas.DataFrame: Preprocessed DataFrame ready for training.

    #The file in the "csv_file_path" has already been pre-processed.

    return df

# **Train Model ML Algorithm**

In [6]:
def train_model(df, num_folds=10, train_split=0.75):

    # Train a RandomForestRegressor model on the DataFrame using K-fold cross-validation.
    # Parameters:
    #     df (pandas.DataFrame): DataFrame containing the training data.
    #     num_folds (int): Number of folds for K-fold cross-validation.
    #     train_split (float): Proportion of data used for training in each fold.
    # Returns:
    #     float: Average Mean Absolute Error (MAE) over all folds.

    # Separate features and target variable
    features = df.drop(columns=['estimated_stock_pct']).values
    target = df['estimated_stock_pct'].values

    # Instantiate RandomForestRegressor model
    model = RandomForestRegressor(random_state=42)

    # Instantiate StandardScaler for feature scaling
    scaler = StandardScaler()

    # Initialize an empty list to store MAE results for each fold
    mae_results = []

    # Create KFold object for K-fold cross-validation
    kf = KFold(n_splits=num_folds, shuffle=True, random_state=42)

    # Perform K-fold cross-validation
    for fold, (train_index, test_index) in enumerate(kf.split(features, target), 1):
        # Split data into training and test sets based on the fold index
        X_train, X_test = features[train_index], features[test_index]
        y_train, y_test = target[train_index], target[test_index]

        # Scale features to aid convergence and prevent large value bias
        scaler.fit(X_train)
        X_train_scaled = scaler.transform(X_train)
        X_test_scaled = scaler.transform(X_test)

        # Train the model
        trained_model = model.fit(X_train_scaled, y_train)

        # Generate predictions on the test set
        y_pred = trained_model.predict(X_test_scaled)

        # Compute Mean Absolute Error (MAE) to assess accuracy for this fold
        mae = mean_absolute_error(y_true=y_test, y_pred=y_pred)
        mae_results.append(mae)
        print(f"Fold {fold}: MAE = {mae:.3f}")

    # Compute the average MAE over all folds
    average_mae = sum(mae_results) / len(mae_results)
    print(f"Average MAE: {average_mae:.2f}")

    return average_mae

In [7]:
def main():
    # Specify the path to the CSV file containing the data
    csv_file_path = "/content/drive/MyDrive/AI Forage/merged_df.csv"

    # Step 1: Load data from CSV file into a DataFrame
    df = load_data(csv_file_path)

    if df is not None:
        # Step 2: Preprocess the data (if required)
        df = preprocess_data(df)

        # Step 3: Train the model and get the average MAE
        average_mae = train_model(df)

        # Additional analysis or reporting can be done here based on the results.
        # For this task, the model performance is printed within the train_model function.

if __name__ == "__main__":
    main()

Fold 1: MAE = 0.236
Fold 2: MAE = 0.235
Fold 3: MAE = 0.239
Fold 4: MAE = 0.235
Fold 5: MAE = 0.242
Fold 6: MAE = 0.234
Fold 7: MAE = 0.240
Fold 8: MAE = 0.234
Fold 9: MAE = 0.238
Fold 10: MAE = 0.231
Average MAE: 0.24
