# Creating Training Loop

## VERSIONS
- 00_02: 
    - Cleanup Testing
- 00_01: 
    - Diagnosing Exploding Gradients
- 00_00: 
    - Initial Version

## Imports

In [98]:
# from importlib.metadata import version
import pandas as pd
# import seaborn as sn
from pathlib import Path
import os
import torch
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
from torch.nn import Module # For type hinting
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import joblib
import time
import argparse

## Data Preparation

### Custom Dataset

In [None]:
class WeatherDataset(Dataset):
    """Dataset class For the CA Weather Fire Dataset"""
    def __init__(self, csv_file="./Data/CA_Weather_Fire_Dataset_Cleaned.csv"):
        try:
            self.data = pd.read_csv(csv_file)   # Assign a pandas data frame
        except FileNotFoundError:
            raise FileNotFoundError(f"File not found: {csv_file}")

        # Define feature and label columns
        self.feature_columns = self.data.columns.drop("MAX_TEMP")
        self.label_column = "MAX_TEMP"

    def __getitem__(self, index):
        features = self.data.loc[index, self.feature_columns].values
        
        label = self.data.loc[index, self.label_column] # Extract the label for the given index
        return (
            torch.tensor(features, dtype=torch.float),
            torch.tensor(label, dtype=torch.float)
        )

    def __len__(self):
        return len(self.data)

### Data Pipeline

In [None]:
def data_pipeline(root_data_dir: str= "./Data", data_file_path: str="CA_Weather_Fire_Dataset_Cleaned.csv", data_splits_dir: str="DataSplits", batch_size: int=64, num_workers=0, pin_memory: bool=False, drop_last: bool=True) -> tuple[Dataset, Dataset, Dataset, DataLoader, DataLoader, DataLoader, StandardScaler]:
    """This function prepares the train, test, and validation datasets.
    Args:
        root_data_dir (str): The root of the Data Directory
        data_file_path (str): The name of the original dataset (with .csv file extension).
        data_splits_dir (str): Path to the train, test, and validation datasets.
        batch_size (int): The dataloader's batch_size.
        num_workers (int): The dataloader's number of workers.
        pin_memory (bool): The dataloader's pin memory option.
        drop_last (bool): The dataloader's drop_last option.

    Returns: 
        train_dataset (Dataset): Dataset Class for the training dataset.
        test_dataset (Dataset): Dataset Class for the test dataset.
        validation_dataset (Dataset): Dataset Class for the validation dataset.
        train_dataloader (DataLoader): The train dataloader.
        test_dataloader (DataLoader): The test dataloader.
        validation_dataloader (DataLoader): The validation dataloader.
        scaler (StandardScaler): The scaler used to scale the features of the model input.
        """
    
    if not root_data_dir or not data_file_path or not data_splits_dir:  # Check for empty strings at the beginning
        raise ValueError("File and directory paths cannot be empty strings.")
    print(f"root_data_dir: {root_data_dir}")
    WEATHER_DATA_DIR = Path(root_data_dir)                  # Set the Data Root Directory

    WEATHER_DATA_CLEAN_PATH = WEATHER_DATA_DIR / data_file_path # Set the path to the complete dataset

    if WEATHER_DATA_CLEAN_PATH.exists():
        print(f"CSV file detected, reading from {WEATHER_DATA_CLEAN_PATH}")
        df = pd.read_csv(WEATHER_DATA_CLEAN_PATH)
    else:
        print(f"Downloading csv file from HuggingFace")
        try:
            df = pd.read_csv("hf://datasets/MaxPrestige/CA_Weather_Fire_Dataset_Cleaned/Data/CA_Weather_Fire_Dataset_Cleaned.csv")  # Download and read the data into a pandas dataframe
            os.makedirs(WEATHER_DATA_DIR, exist_ok=True)        # Create the Data Root Directory
            df.to_csv(WEATHER_DATA_CLEAN_PATH, index=False)     # Save the file, omitting saving the index
        except Exception as e:
            raise RuntimeError(f"An unexpected error occurred during data download or saving: {e}")
    
    DATA_SPLITS_DIR = WEATHER_DATA_DIR / data_splits_dir
    TRAIN_DATA_PATH = DATA_SPLITS_DIR / "train.csv"
    TEST_DATA_PATH = DATA_SPLITS_DIR / "test.csv"
    VALIDATION_DATA_PATH = DATA_SPLITS_DIR / "val.csv"
    SCALER_PATH = DATA_SPLITS_DIR / "scaler.joblib"

    features = ['DAY_OF_YEAR', 'PRECIPITATION', 'LAGGED_PRECIPITATION', 'AVG_WIND_SPEED', 'MIN_TEMP']
    # features = ['PRECIPITATION','AVG_WIND_SPEED', 'MIN_TEMP']
    
    target = 'MAX_TEMP'

    if os.path.exists(TRAIN_DATA_PATH) and os.path.exists(TEST_DATA_PATH) and os.path.exists(VALIDATION_DATA_PATH) :
        print(f"Train, Test, and Validation csv datasets detected in '{DATA_SPLITS_DIR}', skipping generation")
        scaler = joblib.load(SCALER_PATH)
    else:
        print(f"Datasets not found in '{DATA_SPLITS_DIR}' or incomplete. Generating datasets...")
        os.makedirs(DATA_SPLITS_DIR, exist_ok=True)     # Create the Data Splits Parent Directory
        features = ['DAY_OF_YEAR', 'PRECIPITATION', 'LAGGED_PRECIPITATION', 'AVG_WIND_SPEED', 'MIN_TEMP']
        X = df[features]
        y = df[target]

        # split your data before scaling, shuffling the data
        X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.2, random_state=42)
        X_test, X_validation, y_test, y_validation = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

        # Initialize the StandardScaler
        scaler = StandardScaler()

        # Fit the scaler on the training data ONLY. Need to use the scaler on all inputs that the model receives.
        # This means the mean and standard deviation are calculated from the training set.
        scaler.fit(X_train)

        # Transform the training, validation, and test data using the fitted scaler
        X_train_scaled = scaler.transform(X_train)
        X_test_scaled = scaler.transform(X_test)
        X_validation_scaled = scaler.transform(X_validation)

        # Save the fitted scaler object
        try:
            joblib.dump(scaler, SCALER_PATH)
            print(f"Input scaler stored in: ({SCALER_PATH})")
        except Exception as e:
            raise RuntimeError(f"An unexpected error occurred when saving Scaler: {e}")

        X_train_df = pd.DataFrame(X_train_scaled, columns=features)
        X_test_df = pd.DataFrame(X_test_scaled, columns=features)
        X_validation_df = pd.DataFrame(X_validation_scaled, columns=features)

        # Concatenate the features and labels back into a single DataFrame for each set
        train_data_frame = pd.concat([X_train_df, y_train.reset_index(drop=True)], axis=1)
        test_data_frame = pd.concat([X_test_df, y_test.reset_index(drop=True)], axis=1)
        validation_data_frame = pd.concat([X_validation_df, y_validation.reset_index(drop=True)], axis=1)

        # Saving the split data to csv files
        train_data_frame.to_csv(TRAIN_DATA_PATH, index=False)
        test_data_frame.to_csv(TEST_DATA_PATH, index=False)
        validation_data_frame.to_csv(VALIDATION_DATA_PATH, index=False)

    print(f"Initializing DataLoaders and Returning")
    # Initialize the Different Datasets
    train_dataset = WeatherDataset(TRAIN_DATA_PATH)
    test_dataset = WeatherDataset(TEST_DATA_PATH)
    validation_dataset = WeatherDataset(VALIDATION_DATA_PATH)
    # Initialize the Different DataLoaders using the Datasets
    print(f"Creating DataLoaders with batch_size ({batch_size}), num_workers ({num_workers}), pin_memory ({pin_memory}). Training dataset drop_last: ({drop_last})")
    train_dataloader = DataLoader(dataset=train_dataset, batch_size=batch_size, num_workers=num_workers, pin_memory=pin_memory, drop_last=drop_last, shuffle=True)
    test_dataloader = DataLoader(dataset=test_dataset, batch_size=batch_size, num_workers=num_workers, pin_memory=pin_memory, drop_last=drop_last)
    validation_dataloader = DataLoader(dataset=validation_dataset, batch_size=batch_size, num_workers=num_workers, pin_memory=pin_memory, drop_last=drop_last)

    print(f"Training DataLoader has ({len(train_dataloader)}) batches, Test DataLoader has ({len(test_dataloader)}) batches, Validation DataLoader has ({len(validation_dataloader)}) batches")
    
    return (train_dataset, test_dataset, validation_dataset, train_dataloader, test_dataloader, validation_dataloader, scaler)
        

## Agent Architecture

### Layer Block

In [75]:
class LayerBlock(torch.nn.Module):
    """Class for the individual layer blocks."""
    def __init__(self, intermediate_dim=32, dropout_rate=0.1):
        super().__init__()
        self.Layer1 = torch.nn.Linear(in_features=intermediate_dim, out_features=intermediate_dim)
        self.Layer_Norm1 = torch.nn.LayerNorm(normalized_shape=intermediate_dim)
        self.ReLu = torch.nn.ReLU()
        self.dropout = torch.nn.Dropout(p=dropout_rate)

    def forward(self, x):
        x = self.Layer1(x)
        x = self.Layer_Norm1(x)
        x = self.ReLu(x)
        x = self.dropout(x)
        return x

### Weather Agent

In [76]:
class WeatherAgent(torch.nn.Module):
    """Class for Agent Structure using multiple Layer Blocks."""
    def __init__(self, cfg):
        super().__init__()
        self.L1 = torch.nn.Linear(in_features=cfg["in_dim"], out_features=cfg["intermediate_dim"])
        
        self.Layers = torch.nn.Sequential(
            *[LayerBlock(intermediate_dim=cfg["intermediate_dim"], dropout_rate=cfg["dropout_rate"]) for _ in range(cfg["num_blocks"])]
        )
        self.out = torch.nn.Linear(in_features=cfg["intermediate_dim"], out_features=cfg["out_dim"])

    def forward(self, x):
        x = self.L1(x)
        x = self.Layers(x)
        x = self.out(x)
        return x

## Main

### Log Iteration Functions

In [6]:
def log_iteration(batch_idx: int, total_batches: int, loss_value: float):
    """Logs the loss of the current batch."""
    print(f"Epoch batch [{batch_idx}/{total_batches}] | Loss: {loss_value:.7f}")

In [7]:
def log_epoch_iteration(epoch: int, avg_epoch_loss: float):
    """Log Current Metrics accumulated in the current epoch iteration.
    Args:
        epoch (int): the current iteration
        avg_epoch_loss (float): The average loss of the current epoch
    Returns:
        N/A
        """
    if avg_epoch_loss:
        print(f"=====================  [EPOCH ({epoch}) LOGGING]  =====================")
        print("| AVERAGES of THIS EPOCH:")
        print(f"| ACCUMULATED LOSS: {avg_epoch_loss:.7f}")
        print(f"===========================================================")
    
    else:
        print("No Data collected for this epoch to log")

### Evaluate Model Function

In [64]:
def evaluate_model(model: Module, dataloader: DataLoader, current_epoch: int = None, max_epochs: int=None, device: str = 'cpu') -> float:
    """
    Evaluates the model on a given dataset and returns the average loss.
    Args:
        model (Module): The Model.
        dataloader (DataLoader): The dataloader to calculate average loss with.
        current_epoch (int): The current epoch [optional].
        max_epochs (int): The maximum number of epochs [optional].
        device (str): The device that the calculations will take place on.
    Returns:
        avg_loss (float): The calculated average loss.
    """
    model.eval()
    total_loss = 0.0
    # loss_fn = torch.nn.MELoss(reduction='sum') # Use reduction='sum' instead of 'mean' for total loss
    loss_fn = torch.nn.L1Loss(reduction='sum')
    if len(dataloader.dataset) == 0:
        print("Warning: Evaluation dataset is empty. Skipping evaluation.")
        return float('nan')
    
    with torch.no_grad():
        for batch_inputs, batch_labels in dataloader:
            batch_inputs, batch_labels = batch_inputs.to(device), batch_labels.unsqueeze(dim=-1).to(device)
            outputs = model(batch_inputs)
            loss = loss_fn(outputs, batch_labels)
            total_loss += loss.item()
    
    avg_loss = total_loss / len(dataloader.dataset)     # Calculate the average loss on the dataset

    if current_epoch and max_epochs:   # If the function was called in the training loop
        print(f"===================  [Epoch ({current_epoch}/{max_epochs})]  ===================")
        print(f"Entire Validation Dataset Average Loss: {avg_loss:.4f}")
        print(f"====================================================")

    else:   # If the function was called outside of the training loop
        print(f"===============================================")
        print(f"Entire Dataset Average Loss: {avg_loss:.4f} ")
        print(f"=====================================================")
            
    return avg_loss

### Train Model Function

In [None]:
def train_model(model_config: dict, train_dataloader: DataLoader, validation_dataloader: DataLoader, model: WeatherAgent = None, epochs=32, learning_rate=0.0003, max_grad_norm=0.5, log_iterations=10, eval_iterations=10, device="cpu") -> WeatherAgent:
    """The Model Training function.

    Args:
        model_config (dict): The base configurations for building the policies.
        train_dataloader (DataLoader): The dataloader for the training loop.
        validation_dataloader (DataLoader): The dataloader for the validation loop.
        model (WeatherAgent): The model to be trained.
        epochs (int): The number of times the outer loop is performed.
        learning_rate (float): The hyperparameter that affects how much the model's parameters learn on each update iteration.
        max_grad_norm (float): Used to promote numerical stability and prevent exploding gradients.
        log_iterations (int): Used to log information about the state of the Agent.
        eval_iterations (int): Used to run an evaluation of the Agent.
        device (str): The device that the model will be trained on.

    Returns: 
        agent (Module): The Trained Model in evaluation mode.
    """
    print(f"Training Model on {device} with {epochs} main epochs, {learning_rate} learning rate, max_grad_norm={max_grad_norm}.")
    print(f"Logging every {log_iterations} epoch iterations, evaluating every {eval_iterations} epoch iterations.")

    agent = (model if model is not None else WeatherAgent(model_config)).to(device) # Create agent if nothing was passed, otherwise, create the agent. Send agent to device.

    optimizer = torch.optim.AdamW(params=agent.parameters(), lr=learning_rate, weight_decay=0.01)
    loss_fn = torch.nn.L1Loss(reduction='mean')      # Define the Loss function


    history = {'train_loss': [], 'val_loss': []}

    train_dataloader_length = len(train_dataloader)
    agent.train()   # Set agent to training mode
    for epoch in tqdm(range(epochs), desc=f">>>>>>>>>>>>>>>>>>>>>\nMain Epoch (Outer Loop)", leave=True):

        epoch_loss_total = 0.0
        for batch_idx, (inputs, labels) in enumerate(tqdm(train_dataloader, desc=f"Epoch {epoch + 1}/{epochs} - Training", leave=False)):           # Get a mini-batch of training examples from the dataloader
            # optimizer.zero_grad(set_to_none=True)       # Clear the gradients built up; Setting to None to improve performance
            optimizer.zero_grad()       # Clear the gradients built up; Setting to None to improve performance

            inputs, labels = inputs.to(device), labels.unsqueeze(dim=-1).to(device)   # Move the inputs and labels to the device

            agent_outputs = agent(inputs)       # Pass the inputs to the model and get the outputs.

            loss = loss_fn(agent_outputs, labels)      # Calculate the mini-batch loss
            epoch_loss_total += loss.item()
            
            loss.backward()         # Calculate the loss with respect to the model parameters
            torch.nn.utils.clip_grad_norm_(parameters=agent.parameters(), max_norm=max_grad_norm)   # Prevent the gradients from affecting the model parameters too much and reduce the risk of exploding gradients

            optimizer.step()      # Update the model's parameters using the learning rate

            # LOGGING LOSS OF CURRENT ITERATION
            if (batch_idx + 1) % log_iterations == 0:
                log_iteration(batch_idx=(batch_idx + 1), total_batches=train_dataloader_length, loss_value=loss.item())

        # CALCULATE AND STORE THE AVERAGE EPOCH LOSS
        epoch_avg_loss = epoch_loss_total / train_dataloader_length
        history["train_loss"].append(epoch_avg_loss)

        # LOG THE AVERAGE LOSS OF THE EPOCH
        log_epoch_iteration(epoch=epoch, avg_epoch_loss=epoch_avg_loss)

        # EVALUATE THE MODEL
        if (epoch + 1) % eval_iterations == 0:
            val_loss = evaluate_model(model=agent, dataloader=validation_dataloader, current_epoch=(epoch + 1), max_epochs=epochs, device=device)
            history["val_loss"].append(val_loss)
            agent.train()   # Set agent to training mode
        
    return agent.eval(), history

## Main Loop

In [159]:
def main(args) -> int:
    print("SETTING UP FOR TRAINING")
    
    if args.device:     # Check if the user specified to use a CPU or GPU for training
        device = args.device
    else:
        if args.use_cuda:   # Check if the user wanted to use CUDA if available.
            device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    SAVE_LOCATION = "./models/Weather-Agent.pt"   # Define the model path and name of the trained model weights

    BASE_CONFIG={
    "in_dim": 5,
    "intermediate_dim": 128,
    "out_dim": 1,
    "num_blocks": 12,
    "dropout_rate": 0.1
}

    # --- Data Preparation Pipeline --- 
    try:
        (train_dataset, test_dataset, validation_dataset, train_dataloader, test_dataloader, validation_dataloader, scaler) = data_pipeline(batch_size=args.dataloader_batch_size)
    except ValueError as e:
        print(f"Caught an error: {e}")

    print("BEGINNING TRAINING SCRIPT")
    start_time=time.time()

    trained_policy, training_history = train_model(
        model_config=BASE_CONFIG,
        train_dataloader=train_dataloader,
        validation_dataloader=validation_dataloader,
        model=None,     # Create new model
        epochs=args.epochs,
        learning_rate=args.learning_rate,
        max_grad_norm=args.max_grad_norm,
        log_iterations=args.log_iterations,
        eval_iterations=args.eval_iterations,
        device=device,
    )
    end_time=time.time()

    # --- Calculate Training Time --- 

    elapsed_time= end_time - start_time
    hrs = int(elapsed_time / 3600)
    min = int((elapsed_time % 3600) / 60)
    seconds_remaining = elapsed_time - (hrs * 3600 ) - (min * 60)

    print(f"FINISHED MODEL TRAINING. \nTRAINING TOOK: {hrs} Hours, {min} Minutes, and {seconds_remaining:.3f} Seconds")

    # --- Testing Trained Model --- 
    print("\nTESTING THE TRAINED POLICY:")
    test_loss = evaluate_model(model=trained_policy, dataloader=test_dataloader, current_epoch=None, max_epochs=None, device='cpu')

    # ---  Saving Model Section  ---   

    if args.save_model:     # Check if the user wants to save the trained model weights
        if args.model_output_path:     # Check if the user specified a target save location
            SAVE_LOCATION=args.model_output_path

        parent_dir = os.path.dirname(SAVE_LOCATION)

        # If parent_dir is empty, it means the SAVE_LOCATION is just a filename
        # in the current directory, so no new directories need to be created.
        if parent_dir and parent_dir != '.':
            try:
                os.makedirs(parent_dir, exist_ok=True)
                print(f"Parent directory '{parent_dir}' created to store the model.")
            except OSError as e:
                print(f"Error creating directory {parent_dir}: {e}")
                SAVE_LOCATION='model.pt'      # Fall back to a default save location if problem occurs.
        
        try:
            torch.save(trained_policy.state_dict(), f=SAVE_LOCATION)
            print(f"Model weights saved in: {SAVE_LOCATION}")
        except Exception as e:
            print(f"Error saving model to {SAVE_LOCATION}: {e}")

    return 0

In [None]:
# Example usage (assuming you have a way to call this function, e.g., in a main block)
if __name__ == '__main__':
    # --- Begin Timing Main Script Execution Time ---
    main_start_time=time.time()

    parser = argparse.ArgumentParser(description="Train and evaluate a Regression Agent.")

    parser.add_argument('--epochs', type=int, default=8,
        help='(int, default=8) Number of training epochs to run.')

    parser.add_argument('--learning_rate', type=float, default=0.0003,
        help='(float, default=0.0003) Learning rate used by the optimizer.')
    
    parser.add_argument('--max_grad_norm', type=float, default=3.0,
        help='(float, default=3.0) The Maximum L2 Norm of the gradients for Gradient Clipping.')

    parser.add_argument('--dataloader_batch_size', type=int, default=64,
        help='(int, default=64) Batch size used by the dataloaders for training, validation, and testing.')

    parser.add_argument('--dataloader_pin_memory', action='store_false',
        help='(bool, default=True) Disable pinned memory in dataloaders (enabled by default).')

    parser.add_argument('--dataloader_num_workers', type=int, default=0,
        help='(int, default=0) Number of subprocesses to use for data loading.')

    parser.add_argument('--log_iterations', type=int, default=32,
        help='(int, default=32) Frequency (in iterations) to log training progress.')

    parser.add_argument('--eval_iterations', type=int, default=32,
        help='(int, default=32) Frequency (in iterations) to evaluate the model.')

    parser.add_argument('--use_cuda', action='store_true',
        help='(bool, default=False) Enable CUDA for training if available.')

    parser.add_argument('--device', type=str, default='cpu',
        help='(str, default="cpu") Device to use for training (e.g., "cpu", "cuda:0"). Overrides --use_cuda.')

    parser.add_argument('--save_model', action='store_true',
        help='(bool, default=False) Save the trained model after training.')

    parser.add_argument('--model_output_path', type=str, default='models/Weather-Agent.pt',
        help='(str, default="models/Weather-Agent.pt") File path to save the trained model.')


    # # Parse the arguments (disabled for ipynb testing)
    # args = parser.parse_args()

    # For ipynb testing
    simulated_args = [
        '--epochs', '2',
        '--learning_rate', '0.003',
        '--log_iterations', '1',
        '--eval_iterations', '1',
        '--save_model',
        '--model_output_path', 'models/Weather-Agent_01.pt'
    ]
    args = parser.parse_args(args=simulated_args)
    print(args)
    ## End of ipynb testing

    ret = main(args)

    main_end_time=time.time()

    # --- Calculate Main Script Execution Time --- 

    elapsed_time= main_end_time - main_start_time
    hrs = int(elapsed_time / 3600)
    min = int((elapsed_time % 3600) / 60)
    seconds_remaining = elapsed_time - (hrs * 3600 ) - (min * 60)

    print(f"FINISHED MAIN SCRIPT\nOVERALL DURATION: {hrs} Hours, {min} Minutes, and {seconds_remaining:.3f} Seconds")
    if ret == 0:
        print("TERMINATING PROGRAM")
    else: 
        print("Main Scipt Error")

Namespace(epochs=2, learning_rate=0.003, max_grad_norm=3.0, dataloader_batch_size=64, dataloader_pin_memory=True, dataloader_num_workers=0, log_iterations=1, eval_iterations=1, use_cuda=False, device='cpu', save_model=True, model_output_path='models/Weather-Agent_01.pt')
SETTING UP FOR TRAINING
root_data_dir: ../Data
CSV file detected, reading from ..\Data\CA_Weather_Fire_Dataset_Cleaned.csv
Train, Test, and Validation csv datasets detected in '..\Data\DataSplits', skipping generation
Initializing DataLoaders and Returning
Creating DataLoaders with batch_size (64), num_workers (0), pin_memory (False). Training dataset drop_last: (True)
Training DataLoader has (187) batches, Test DataLoader has (23) batches, Validation DataLoader has (23) batches
BEGINNING TRAINING SCRIPT
Training Model on cpu with 2 main epochs, 0.003 learning rate, max_grad_norm=3.0.
Logging every 1 epoch iterations, evaluating every 1 epoch iterations.


>>>>>>>>>>>>>>>>>>>>>
Main Epoch (Outer Loop):   0%|          | 0/2 [00:00<?, ?it/s]


Epoch batch [1/187] | Loss: 70.1400757
Epoch batch [2/187] | Loss: 68.1602020
Epoch batch [3/187] | Loss: 67.8848953
Epoch batch [4/187] | Loss: 66.3139648
Epoch batch [5/187] | Loss: 67.0288162


Epoch 1/2 - Training:   3%|▎         | 5/187 [00:00<00:10, 16.84it/s][A

Epoch batch [6/187] | Loss: 67.9754333
Epoch batch [7/187] | Loss: 66.1805420
Epoch batch [8/187] | Loss: 66.4998779
Epoch batch [9/187] | Loss: 65.0562210





Epoch batch [10/187] | Loss: 66.3607101
Epoch batch [11/187] | Loss: 66.6029587
Epoch batch [12/187] | Loss: 66.1226349
Epoch batch [13/187] | Loss: 67.3417892
Epoch batch [14/187] | Loss: 65.1002121


Epoch 1/2 - Training:   7%|▋         | 14/187 [00:00<00:09, 18.79it/s][A


Epoch batch [15/187] | Loss: 63.7232895
Epoch batch [16/187] | Loss: 63.7492867
Epoch batch [17/187] | Loss: 64.3647919
Epoch batch [18/187] | Loss: 63.9357376


Epoch 1/2 - Training:  10%|▉         | 18/187 [00:01<00:10, 16.45it/s][A

Epoch batch [19/187] | Loss: 65.1810226
Epoch batch [20/187] | Loss: 63.4720764
Epoch batch [21/187] | Loss: 62.6652184
Epoch batch [22/187] | Loss: 62.4305687





Epoch batch [23/187] | Loss: 65.5961914
Epoch batch [24/187] | Loss: 61.6426010
Epoch batch [25/187] | Loss: 62.1015701
Epoch batch [26/187] | Loss: 62.5686073
Epoch batch [27/187] | Loss: 62.1772156


Epoch 1/2 - Training:  14%|█▍        | 27/187 [00:01<00:08, 18.55it/s][A

Epoch batch [28/187] | Loss: 61.0596275
Epoch batch [29/187] | Loss: 60.3999443
Epoch batch [30/187] | Loss: 61.7611542
Epoch batch [31/187] | Loss: 61.2464218
Epoch batch [32/187] | Loss: 59.8869781




Epoch batch [33/187] | Loss: 62.1924438
Epoch batch [34/187] | Loss: 58.6151505
Epoch batch [35/187] | Loss: 60.4112473
Epoch batch [36/187] | Loss: 57.4276352
Epoch batch [37/187] | Loss: 59.3338127




Epoch batch [38/187] | Loss: 58.7051086
Epoch batch [39/187] | Loss: 59.2376938
Epoch batch [40/187] | Loss: 57.1870003
Epoch batch [41/187] | Loss: 58.9570274




Epoch batch [42/187] | Loss: 58.9952965
Epoch batch [43/187] | Loss: 58.5663452
Epoch batch [44/187] | Loss: 57.4245377
Epoch batch [45/187] | Loss: 58.4157028




Epoch batch [46/187] | Loss: 58.0556717
Epoch batch [47/187] | Loss: 56.2730179
Epoch batch [48/187] | Loss: 55.6443558
Epoch batch [49/187] | Loss: 56.6646423




Epoch batch [50/187] | Loss: 56.1174240
Epoch batch [51/187] | Loss: 55.4402542
Epoch batch [52/187] | Loss: 54.8530579
Epoch batch [53/187] | Loss: 53.1472473




Epoch batch [54/187] | Loss: 54.0842285
Epoch batch [55/187] | Loss: 54.5163116
Epoch batch [56/187] | Loss: 51.7625809
Epoch batch [57/187] | Loss: 54.0427017




Epoch batch [58/187] | Loss: 53.0251122
Epoch batch [59/187] | Loss: 52.5202408
Epoch batch [60/187] | Loss: 51.9548378
Epoch batch [61/187] | Loss: 52.1106300
Epoch batch [62/187] | Loss: 53.2005081





Epoch batch [63/187] | Loss: 50.2963791
Epoch batch [64/187] | Loss: 51.5730820
Epoch batch [65/187] | Loss: 50.3407745


Epoch 1/2 - Training:  35%|███▍      | 65/187 [00:03<00:08, 14.20it/s][A

Epoch batch [66/187] | Loss: 52.3080597
Epoch batch [67/187] | Loss: 49.4994545
Epoch batch [68/187] | Loss: 49.1236534




Epoch batch [69/187] | Loss: 49.0743980
Epoch batch [70/187] | Loss: 49.9359703
Epoch batch [71/187] | Loss: 48.4706573




Epoch batch [72/187] | Loss: 47.6521606
Epoch batch [73/187] | Loss: 48.5719910
Epoch batch [74/187] | Loss: 48.2192345
Epoch batch [75/187] | Loss: 46.0685844





Epoch batch [76/187] | Loss: 47.8280640
Epoch batch [77/187] | Loss: 45.6128998
Epoch batch [78/187] | Loss: 45.6582794
Epoch batch [79/187] | Loss: 45.8259010


Epoch 1/2 - Training:  42%|████▏     | 79/187 [00:04<00:06, 15.56it/s][A

Epoch batch [80/187] | Loss: 45.4121780
Epoch batch [81/187] | Loss: 45.4136276
Epoch batch [82/187] | Loss: 44.1586456
Epoch batch [83/187] | Loss: 43.5690575




Epoch batch [84/187] | Loss: 44.4434624
Epoch batch [85/187] | Loss: 42.0782661
Epoch batch [86/187] | Loss: 41.8531113
Epoch batch [87/187] | Loss: 43.8563385




Epoch batch [88/187] | Loss: 40.3195152
Epoch batch [89/187] | Loss: 41.3007965
Epoch batch [90/187] | Loss: 42.6369362
Epoch batch [91/187] | Loss: 40.0566711




Epoch batch [92/187] | Loss: 41.0889893
Epoch batch [93/187] | Loss: 37.7453995
Epoch batch [94/187] | Loss: 40.2092514




Epoch batch [95/187] | Loss: 40.3295441
Epoch batch [96/187] | Loss: 37.3151474
Epoch batch [97/187] | Loss: 37.9390869
Epoch batch [98/187] | Loss: 38.3960876




Epoch batch [99/187] | Loss: 37.4543724
Epoch batch [100/187] | Loss: 35.7070236
Epoch batch [101/187] | Loss: 37.5486259
Epoch batch [102/187] | Loss: 36.2819939
Epoch batch [103/187] | Loss: 35.5512772




Epoch batch [104/187] | Loss: 35.7856598
Epoch batch [105/187] | Loss: 34.9512711
Epoch batch [106/187] | Loss: 34.6282654
Epoch batch [107/187] | Loss: 32.6976776




Epoch batch [108/187] | Loss: 32.2984924
Epoch batch [109/187] | Loss: 32.1576958
Epoch batch [110/187] | Loss: 31.8432999
Epoch batch [111/187] | Loss: 31.5194702




Epoch batch [112/187] | Loss: 31.1202278
Epoch batch [113/187] | Loss: 29.5696564
Epoch batch [114/187] | Loss: 29.1379299
Epoch batch [115/187] | Loss: 28.4384098




Epoch batch [116/187] | Loss: 28.7278404
Epoch batch [117/187] | Loss: 29.0406284
Epoch batch [118/187] | Loss: 28.6249657
Epoch batch [119/187] | Loss: 27.1966991




Epoch batch [120/187] | Loss: 28.2284336
Epoch batch [121/187] | Loss: 25.6611633
Epoch batch [122/187] | Loss: 26.5191040
Epoch batch [123/187] | Loss: 24.2050285




Epoch batch [124/187] | Loss: 26.0359802
Epoch batch [125/187] | Loss: 23.5956192
Epoch batch [126/187] | Loss: 24.6318169
Epoch batch [127/187] | Loss: 25.4187145




Epoch batch [128/187] | Loss: 25.3627300
Epoch batch [129/187] | Loss: 21.7063160
Epoch batch [130/187] | Loss: 21.8021793
Epoch batch [131/187] | Loss: 19.9983253




Epoch batch [132/187] | Loss: 22.3391304
Epoch batch [133/187] | Loss: 19.6662388




Epoch batch [134/187] | Loss: 21.0792122
Epoch batch [135/187] | Loss: 19.1410332
Epoch batch [136/187] | Loss: 19.0673485
Epoch batch [137/187] | Loss: 19.4316845




Epoch batch [138/187] | Loss: 19.1897545
Epoch batch [139/187] | Loss: 18.3253841
Epoch batch [140/187] | Loss: 18.8317795
Epoch batch [141/187] | Loss: 17.9591160




Epoch batch [142/187] | Loss: 16.5005741
Epoch batch [143/187] | Loss: 16.6606579
Epoch batch [144/187] | Loss: 14.2185946




Epoch batch [145/187] | Loss: 15.4620285
Epoch batch [146/187] | Loss: 13.8257217
Epoch batch [147/187] | Loss: 14.6363039




Epoch batch [148/187] | Loss: 12.7458858
Epoch batch [149/187] | Loss: 13.2131510
Epoch batch [150/187] | Loss: 12.4107475




Epoch batch [151/187] | Loss: 10.8883438
Epoch batch [152/187] | Loss: 12.4115906
Epoch batch [153/187] | Loss: 9.4000340




Epoch batch [154/187] | Loss: 10.0288601
Epoch batch [155/187] | Loss: 9.3276577
Epoch batch [156/187] | Loss: 10.6778154
Epoch batch [157/187] | Loss: 7.8205237





Epoch batch [158/187] | Loss: 9.9620333
Epoch batch [159/187] | Loss: 7.7630882
Epoch batch [160/187] | Loss: 8.3291416
Epoch batch [161/187] | Loss: 8.5877676
Epoch batch [162/187] | Loss: 7.0911846


Epoch 1/2 - Training:  87%|████████▋ | 162/187 [00:10<00:01, 16.08it/s][A


Epoch batch [163/187] | Loss: 7.9986954
Epoch batch [164/187] | Loss: 6.8702445
Epoch batch [165/187] | Loss: 7.2214222
Epoch batch [166/187] | Loss: 7.1084528
Epoch batch [167/187] | Loss: 6.3668976


Epoch 1/2 - Training:  89%|████████▉ | 167/187 [00:10<00:01, 17.67it/s][A

Epoch batch [168/187] | Loss: 7.4980950
Epoch batch [169/187] | Loss: 5.5494947
Epoch batch [170/187] | Loss: 5.2898464
Epoch batch [171/187] | Loss: 5.8372760




Epoch batch [172/187] | Loss: 6.2006140
Epoch batch [173/187] | Loss: 6.0691109
Epoch batch [174/187] | Loss: 5.7595100




Epoch batch [175/187] | Loss: 6.8877854
Epoch batch [176/187] | Loss: 5.8339510
Epoch batch [177/187] | Loss: 6.7584357
Epoch batch [178/187] | Loss: 6.0163589





Epoch batch [179/187] | Loss: 7.1420660
Epoch batch [180/187] | Loss: 6.6053863
Epoch batch [181/187] | Loss: 6.7224746
Epoch batch [182/187] | Loss: 7.0827303
Epoch batch [183/187] | Loss: 6.0128117


Epoch 1/2 - Training:  98%|█████████▊| 183/187 [00:11<00:00, 18.03it/s][A

Epoch batch [184/187] | Loss: 6.7132168
Epoch batch [185/187] | Loss: 6.6905122
Epoch batch [186/187] | Loss: 7.8082123
Epoch batch [187/187] | Loss: 6.5152988
| AVERAGES of THIS EPOCH:
| ACCUMULATED LOSS: 37.1272315


>>>>>>>>>>>>>>>>>>>>>
Main Epoch (Outer Loop):  50%|█████     | 1/2 [00:12<00:12, 12.23s/it]

Entire Validation Dataset Average Loss: 5.3360




Epoch batch [1/187] | Loss: 6.9470329
Epoch batch [2/187] | Loss: 6.5630612




Epoch batch [3/187] | Loss: 7.6005969
Epoch batch [4/187] | Loss: 7.0762086
Epoch batch [5/187] | Loss: 7.9681711




Epoch batch [6/187] | Loss: 7.1595964
Epoch batch [7/187] | Loss: 6.8101010
Epoch batch [8/187] | Loss: 6.3882542
Epoch batch [9/187] | Loss: 5.9841256




Epoch batch [10/187] | Loss: 7.3917055
Epoch batch [11/187] | Loss: 7.1466303
Epoch batch [12/187] | Loss: 5.8305426
Epoch batch [13/187] | Loss: 6.7870975




Epoch batch [14/187] | Loss: 6.8915749
Epoch batch [15/187] | Loss: 6.5520191
Epoch batch [16/187] | Loss: 6.3175406
Epoch batch [17/187] | Loss: 5.6562591




Epoch batch [18/187] | Loss: 6.2456045
Epoch batch [19/187] | Loss: 7.6980371
Epoch batch [20/187] | Loss: 6.4351196
Epoch batch [21/187] | Loss: 7.8966894




Epoch batch [22/187] | Loss: 6.2829542
Epoch batch [23/187] | Loss: 6.7609091
Epoch batch [24/187] | Loss: 5.8767281
Epoch batch [25/187] | Loss: 6.1891518
Epoch batch [26/187] | Loss: 5.2323112




Epoch batch [27/187] | Loss: 6.8411412
Epoch batch [28/187] | Loss: 6.1003823
Epoch batch [29/187] | Loss: 6.7653613
Epoch batch [30/187] | Loss: 6.1123643




Epoch batch [31/187] | Loss: 5.4515719
Epoch batch [32/187] | Loss: 6.0340567
Epoch batch [33/187] | Loss: 6.3670955
Epoch batch [34/187] | Loss: 6.9453778




Epoch batch [35/187] | Loss: 6.9589710
Epoch batch [36/187] | Loss: 7.0033336
Epoch batch [37/187] | Loss: 6.6139169
Epoch batch [38/187] | Loss: 5.9928942




Epoch batch [39/187] | Loss: 6.1628380
Epoch batch [40/187] | Loss: 5.5005307
Epoch batch [41/187] | Loss: 5.9963827
Epoch batch [42/187] | Loss: 6.1652722




Epoch batch [43/187] | Loss: 5.8862610
Epoch batch [44/187] | Loss: 7.3021445
Epoch batch [45/187] | Loss: 7.1701355
Epoch batch [46/187] | Loss: 5.8235140




Epoch batch [47/187] | Loss: 6.0267358
Epoch batch [48/187] | Loss: 7.3255730
Epoch batch [49/187] | Loss: 6.6863370
Epoch batch [50/187] | Loss: 6.5689049





Epoch batch [51/187] | Loss: 6.7330761
Epoch batch [52/187] | Loss: 5.9750676
Epoch batch [53/187] | Loss: 5.4739838
Epoch batch [54/187] | Loss: 7.3157539


Epoch 2/2 - Training:  29%|██▉       | 54/187 [00:03<00:07, 17.22it/s][A

Epoch batch [55/187] | Loss: 5.9239264
Epoch batch [56/187] | Loss: 5.3838186
Epoch batch [57/187] | Loss: 6.1671677
Epoch batch [58/187] | Loss: 6.2600117




Epoch batch [59/187] | Loss: 7.0201421
Epoch batch [60/187] | Loss: 6.0317602
Epoch batch [61/187] | Loss: 6.1082630




Epoch batch [62/187] | Loss: 7.1050291
Epoch batch [63/187] | Loss: 7.2152386
Epoch batch [64/187] | Loss: 5.9621582





Epoch batch [65/187] | Loss: 6.8941612
Epoch batch [66/187] | Loss: 6.7885199
Epoch batch [67/187] | Loss: 4.9352660


Epoch 2/2 - Training:  36%|███▌      | 67/187 [00:04<00:08, 13.63it/s][A

Epoch batch [68/187] | Loss: 6.1191835
Epoch batch [69/187] | Loss: 5.6706910
Epoch batch [70/187] | Loss: 6.0035696
Epoch batch [71/187] | Loss: 7.9507551
Epoch batch [72/187] | Loss: 6.6715741





Epoch batch [73/187] | Loss: 6.9274149
Epoch batch [74/187] | Loss: 5.6983728
Epoch batch [75/187] | Loss: 5.4890947
Epoch batch [76/187] | Loss: 4.9205799


Epoch 2/2 - Training:  41%|████      | 76/187 [00:04<00:07, 15.29it/s][A

Epoch batch [77/187] | Loss: 6.2467327
Epoch batch [78/187] | Loss: 6.0023975
Epoch batch [79/187] | Loss: 6.0276175




Epoch batch [80/187] | Loss: 5.6457911
Epoch batch [81/187] | Loss: 7.9319425
Epoch batch [82/187] | Loss: 6.5496092
Epoch batch [83/187] | Loss: 6.2406044




Epoch batch [84/187] | Loss: 5.7938070
Epoch batch [85/187] | Loss: 6.7754869
Epoch batch [86/187] | Loss: 5.1830254
Epoch batch [87/187] | Loss: 5.9783926




Epoch batch [88/187] | Loss: 6.5920558
Epoch batch [89/187] | Loss: 5.6028490
Epoch batch [90/187] | Loss: 5.9819894
Epoch batch [91/187] | Loss: 5.6808009




Epoch batch [92/187] | Loss: 5.0330572
Epoch batch [93/187] | Loss: 5.7933750
Epoch batch [94/187] | Loss: 6.1120548
Epoch batch [95/187] | Loss: 5.7880025




Epoch batch [96/187] | Loss: 6.4506216
Epoch batch [97/187] | Loss: 6.3593926
Epoch batch [98/187] | Loss: 5.7347903




Epoch batch [99/187] | Loss: 6.4590359
Epoch batch [100/187] | Loss: 5.7832251
Epoch batch [101/187] | Loss: 5.3350697
Epoch batch [102/187] | Loss: 5.1133680





Epoch batch [103/187] | Loss: 5.4966078
Epoch batch [104/187] | Loss: 5.8974719
Epoch batch [105/187] | Loss: 7.2380066
Epoch batch [106/187] | Loss: 6.2766418


Epoch 2/2 - Training:  57%|█████▋    | 106/187 [00:06<00:04, 16.47it/s][A

Epoch batch [107/187] | Loss: 6.4270377
Epoch batch [108/187] | Loss: 5.6208420
Epoch batch [109/187] | Loss: 6.7607594
Epoch batch [110/187] | Loss: 6.5223680




Epoch batch [111/187] | Loss: 6.0486631
Epoch batch [112/187] | Loss: 6.7772636
Epoch batch [113/187] | Loss: 5.9341550
Epoch batch [114/187] | Loss: 7.1816874




Epoch batch [115/187] | Loss: 6.4316559
Epoch batch [116/187] | Loss: 7.2120132
Epoch batch [117/187] | Loss: 6.5180635
Epoch batch [118/187] | Loss: 6.3783708




Epoch batch [119/187] | Loss: 4.7076645
Epoch batch [120/187] | Loss: 6.2921238
Epoch batch [121/187] | Loss: 5.9893823
Epoch batch [122/187] | Loss: 5.0884104




Epoch batch [123/187] | Loss: 6.9014192
Epoch batch [124/187] | Loss: 5.3937545
Epoch batch [125/187] | Loss: 6.1784463
Epoch batch [126/187] | Loss: 6.2069769




Epoch batch [127/187] | Loss: 6.4685273
Epoch batch [128/187] | Loss: 4.7249460
Epoch batch [129/187] | Loss: 6.2358098
Epoch batch [130/187] | Loss: 6.4785199




Epoch batch [131/187] | Loss: 6.3454118
Epoch batch [132/187] | Loss: 5.6010628
Epoch batch [133/187] | Loss: 6.3772297
Epoch batch [134/187] | Loss: 6.1484375




Epoch batch [135/187] | Loss: 6.3772545
Epoch batch [136/187] | Loss: 4.7496700
Epoch batch [137/187] | Loss: 5.6031408
Epoch batch [138/187] | Loss: 5.8776503
Epoch batch [139/187] | Loss: 5.4263029




Epoch batch [140/187] | Loss: 5.5468678
Epoch batch [141/187] | Loss: 6.7286777
Epoch batch [142/187] | Loss: 6.2394381
Epoch batch [143/187] | Loss: 5.4543867
Epoch batch [144/187] | Loss: 4.6275854




Epoch batch [145/187] | Loss: 6.3616514
Epoch batch [146/187] | Loss: 6.5450506
Epoch batch [147/187] | Loss: 6.8846521
Epoch batch [148/187] | Loss: 6.8152094




Epoch batch [149/187] | Loss: 6.9628263
Epoch batch [150/187] | Loss: 7.2992392




Epoch batch [151/187] | Loss: 4.8577924
Epoch batch [152/187] | Loss: 5.5219917
Epoch batch [153/187] | Loss: 6.5660105
Epoch batch [154/187] | Loss: 5.3946161




Epoch batch [155/187] | Loss: 5.2645493
Epoch batch [156/187] | Loss: 6.2263260
Epoch batch [157/187] | Loss: 7.0218711
Epoch batch [158/187] | Loss: 5.6351895




Epoch batch [159/187] | Loss: 5.6761980
Epoch batch [160/187] | Loss: 6.1377153
Epoch batch [161/187] | Loss: 5.8515215
Epoch batch [162/187] | Loss: 5.5965443




Epoch batch [163/187] | Loss: 5.9736977
Epoch batch [164/187] | Loss: 4.8118181
Epoch batch [165/187] | Loss: 5.7709904
Epoch batch [166/187] | Loss: 5.3343434




Epoch batch [167/187] | Loss: 5.3499036
Epoch batch [168/187] | Loss: 5.0573053
Epoch batch [169/187] | Loss: 6.4104075
Epoch batch [170/187] | Loss: 5.6306529




Epoch batch [171/187] | Loss: 5.5852776
Epoch batch [172/187] | Loss: 5.1808095
Epoch batch [173/187] | Loss: 5.2630191
Epoch batch [174/187] | Loss: 5.3726945



Epoch 2/2 - Training:  96%|█████████▌| 179/187 [00:10<00:00, 18.26it/s]

Epoch batch [175/187] | Loss: 6.8418489
Epoch batch [176/187] | Loss: 5.4197249
Epoch batch [177/187] | Loss: 6.1076665
Epoch batch [178/187] | Loss: 6.1374264
Epoch batch [179/187] | Loss: 5.4293580


[A

Epoch batch [180/187] | Loss: 5.5705748
Epoch batch [181/187] | Loss: 5.9578013
Epoch batch [182/187] | Loss: 6.0178537
Epoch batch [183/187] | Loss: 5.1916780
Epoch batch [184/187] | Loss: 5.3249416




Epoch batch [185/187] | Loss: 5.7195196
Epoch batch [186/187] | Loss: 6.7445779
Epoch batch [187/187] | Loss: 5.8553877
| AVERAGES of THIS EPOCH:
| ACCUMULATED LOSS: 6.1689718


>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>p): 100%|██████████| 2/2 [00:24<00:00, 12.05s/it]
Main Epoch (Outer Loop): 100%|██████████| 2/2 [00:24<00:00, 12.08s/it]

Entire Validation Dataset Average Loss: 4.6751
FINISHED MODEL TRAINING. 
TRAINING TOOK: 0 Hours, 0 Minutes, and 24.189 Seconds

TESTING THE TRAINED POLICY:





Entire Dataset Average Loss: 4.7193 
Parent directory 'models' created to store the model.
Model weights saved in: models/Weather-Agent_01.pt
FINISHED MAIN SCRIPT
OVERALL DURATION: 0 Hours, 0 Minutes, and 24.914 Seconds
TERMINATING PROGRAM
