In [None]:
# Import Libraries
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import StandardScaler
import joblib

# Define constants
K = 10
split = 0.75

# load CSV file and train the model
def load_csv_dataset(csv_file_path, save_model_path):
    
    """
    Load a CSV file into a Pandas DataFrame.

    Args:
        file_path (str): The path to the CSV file.

    Returns:
        pd.DataFrame: The loaded dataset.
    """
    # Load CSV data into a DataFrame
    df = pd.read_csv(csv_file_path)
    return df

def create_target_predictors(data, target="estimated_stock_pct"):
    """
    Split a dataset into predictor variables (X) and the target variable (y).

    Args:
        data (pd.DataFrame): The input dataset.
        target (str): The name of the target variable.

    Returns:
        pd.DataFrame: Predictor variables (X).
        pd.Series: Target variable (y).
    """
    # Define X and y
    X = df.drop(columns=['estimated_stock_pct'])
    y = df['estimated_stock_pct']
    return X, y
   
# Define the machine learning model (RandomForestRegressor)
    model = RandomForestRegressor()

def train_algorithm_with_cross_validation(X, y):
    """
    Train a Random Forest Regressor model using K-fold cross-validation and calculate MAE.

    Args:
        X (pd.DataFrame): Predictor variables.
        y (pd.Series): Target variable.

    Returns:
        float: Average MAE across K folds.
    """
    # Create a list to store the accuracies of each fold
    accuracy = []

    for fold in range(K):
        # Instantiate the algorithm and scaler
        model = RandomForestRegressor()
        scaler = StandardScaler()

        # Split the data into training and test sets
        X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=split, random_state=42)
        
        # Feature scaling
        scaler.fit(X_train)
        X_train = scaler.fit_transform(X_train)
        X_test = scaler.transform(X_test)
        

        # Train the model
        trained_model = model.fit(X_train, y_train)

        # Make predictions
        y_pred = trained_model.predict(X_test)


        # Calculate Mean Absolute Error (MAE)
        mae = mean_absolute_error(y_true=y_test, y_pred=y_pred)
        accuracy.append(mae)
        print(f"Fold {fold + 1}: MAE = {mae:.3f}")

    # Calculate the average MAE across all folds
    avg_mae = sum(accuracy) / len(accuracy)
    print(f"Average MAE: {avg_mae:.2f}")
    return avg_mae

    # Save the trained model
    joblib.dump(model, save_model_path)

if __name__ == "__main__":
    # Specify the path to the CSV file containing the dataset
    csv_file_path = 'csv_file_path'

    # Load the dataset
    dataset = load_csv_dataset(csv_file_path)

    # Create target and predictor variables
    X, y = create_target_predictors(dataset)

    # Train the algorithm with cross-validation
    avg_mae = train_algorithm_with_cross_validation(X, y)


In [None]:
# Import Libraries
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import StandardScaler
import joblib

# Define constants
K = 10
split = 0.75

# Load CSV file and train the model
def load_csv_dataset(csv_file_path):
    """
    Load a CSV file into a Pandas DataFrame.

    Args:
        file_path (str): The path to the CSV file.

    Returns:
        pd.DataFrame: The loaded dataset.
    """
    # Load CSV data into a DataFrame
    df = pd.read_csv(csv_file_path)
    return df

def create_target_predictors(data, target="estimated_stock_pct"):
    """
    Split a dataset into predictor variables (X) and the target variable (y).

    Args:
        data (pd.DataFrame): The input dataset.
        target (str): The name of the target variable.

    Returns:
        pd.DataFrame: Predictor variables (X).
        pd.Series: Target variable (y).
    """
    # Define X and y
    X = data.drop(columns=['estimated_stock_pct'])
    y = data['estimated_stock_pct']
    return X, y

# Instantiate the algorithm and scaler
model = RandomForestRegressor()

def train_algorithm_with_cross_validation(X, y, save_model_path):
    """
    Train a Random Forest Regressor model using K-fold cross-validation and calculate MAE.

    Args:
        X (pd.DataFrame): Predictor variables.
        y (pd.Series): Target variable.
        save_model_path (str): Path to save the trained model.

    Returns:
        float: Average MAE across K folds.
    """
    # Create a list to store the accuracies of each fold
    accuracy = []

    for fold in range(K):
        # Instantiate the scaler
        scaler = StandardScaler()

        # Split the data into training and test sets
        X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=split, random_state=42)

        # Feature scaling
        scaler.fit(X_train)
        X_train = scaler.fit_transform(X_train)
        X_test = scaler.transform(X_test)

        # Train the model
        trained_model = model.fit(X_train, y_train)

        # Make predictions
        y_pred = trained_model.predict(X_test)

        # Calculate Mean Absolute Error (MAE)
        mae = mean_absolute_error(y_true=y_test, y_pred=y_pred)
        accuracy.append(mae)
        print(f"Fold {fold + 1}: MAE = {mae:.3f}")

    # Calculate the average MAE across all folds
    avg_mae = sum(accuracy) / len(accuracy)
    print(f"Average MAE: {avg_mae:.2f}")

    # Save the trained model
    joblib.dump(trained_model, save_model_path)

    return avg_mae

if __name__ == "__main__":
    # Specify the path to the CSV file containing the dataset
    csv_file_path = 'csv_file_path'

    # Specify the path to save the trained model
    save_model_path = 'save_model_path.pkl'

    # Load the dataset
    dataset = load_csv_dataset(csv_file_path)

    # Create target and predictor variables
    X, y = create_target_predictors(dataset)

    # Train the algorithm with cross-validation and save the model
    avg_mae = train_algorithm_with_cross_validation(X, y, save_model_path)
