In [1]:
# Imports used for ML Pipeline
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense, Dropout

In [12]:
import mlflow
print("mlflow ui --port 5000")

mlflow ui --port 5000


In [2]:
def load_data(file_name):
    '''
    Load data from a csv file into a pandas dataframe.
    
    Args:
    file_name: The name of the csv file to load
    
    Returns:
    df: A pandas dataframe containing the data from the csv file
    '''
    folder = "datasets"

    df = pd.read_csv(f"{folder}/{file_name}")
    return df

In [4]:
import mlflow
import mlflow.sklearn
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score  # Change this based on your model type

def train_model(model, dataset_name, feature_columns=None, target_column="fall_binary", experiment_name="default_experiment"):
    """
    Loads the dataset, trains the model, and logs everything to MLflow.

    Args:
        model: The machine learning model (already defined in the notebook).
        dataset_name: The name of the dataset CSV file.
        feature_columns: List of feature column names (overwritten if 'value' columns exist).
        target_column: The name of the target column.
        experiment_name: Name of the MLflow experiment (default: "default_experiment").
    
    Returns:
        The trained model.
    """

    # Load dataset
    df = load_data(dataset_name)

    # Auto-detect feature columns if any column starts with "value"
    value_columns = [col for col in df.columns if col.startswith("value")]

    if value_columns:
        feature_columns = value_columns  # Override feature selection
    elif feature_columns is None:
        raise ValueError("Feature columns must be specified if no 'value' columns exist.")

    # Ensure target column exists
    if target_column not in df.columns:
        raise ValueError(f"Target column '{target_column}' not found in dataset.")

    # Split into features and target
    X = df[feature_columns]
    y = df[target_column]

    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Start MLflow experiment
    mlflow.set_experiment(experiment_name)

    with mlflow.start_run():
        # Train the model
        model.fit(X_train, y_train)

        # Make predictions
        y_pred = model.predict(X_test)

        # Evaluate model (change metric depending on the model type)
        accuracy = accuracy_score(y_test, y_pred)

        # Log parameters, metrics, and model
        mlflow.log_param("dataset", dataset_name)
        mlflow.log_param("model", model.__class__.__name__)
        mlflow.log_param("features_used", str(feature_columns))
        mlflow.log_metric("accuracy", accuracy)
        mlflow.sklearn.log_model(model, "model")

        print(f"Model trained with accuracy: {accuracy:.4f}")

    return model

### Model Collection 

In [None]:
# ==========================
# CONFIGURABLE VARIABLES
# ==========================
RNN_UNITS = 64           # Number of RNN units
DENSE_UNITS = 32         # Number of neurons in the dense layer
DROPOUT_RATE = 0.2       # Dropout rate for regularization
OPTIMIZER = "adam"       # Optimizer: "adam", "sgd", "rmsprop", etc.
LEARNING_RATE = 0.001    # Learning rate
LOSS_FUNCTION = "binary_crossentropy"  # "binary_crossentropy" for classification
METRICS = ["accuracy"]   # Metrics to monitor
EPOCHS = 10              # Number of training epochs
BATCH_SIZE = 16          # Batch size for training
INPUT_SHAPE = (500, 3)   # (Time steps, Features) - Adjust based on your dataset
