In [1]:
import numpy as np
import matplotlib.pyplot as plt
import transformers
import os
import random
import h5py
import pandas as pd
import time
import importlib
from pprint import pprint
import torch 
from tqdm import tqdm
from src.qwen import load_qwen
import re
import yaml
import gc
from torch.utils.data import TensorDataset, DataLoader

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu' 
print('Device-activated: ', device)
file_path = "data/lotka_volterra_data.h5"  # Change this to the correct path  Out [9]:  Device-activated:  cpu

Device-activated:  cpu


In [3]:
!lscpu

Architecture:        x86_64
CPU op-mode(s):      32-bit, 64-bit
Byte Order:          Little Endian
CPU(s):              76
On-line CPU(s) list: 0-75
Thread(s) per core:  1
Core(s) per socket:  38
Socket(s):           2
NUMA node(s):        2
Vendor ID:           GenuineIntel
CPU family:          6
Model:               106
Model name:          Intel(R) Xeon(R) Platinum 8368Q CPU @ 2.60GHz
Stepping:            6
CPU MHz:             800.000
CPU max MHz:         3700.0000
CPU min MHz:         800.0000
BogoMIPS:            5200.00
L1d cache:           48K
L1i cache:           32K
L2 cache:            1280K
L3 cache:            58368K
NUMA node0 CPU(s):   0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32,34,36,38,40,42,44,46,48,50,52,54,56,58,60,62,64,66,68,70,72,74
NUMA node1 CPU(s):   1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31,33,35,37,39,41,43,45,47,49,51,53,55,57,59,61,63,65,67,69,71,73,75
Flags:               fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush d

In [None]:
import src
import src.data_prepare
import src.forecast 
import src.preprocess
import src.lora 
import src.data_create

importlib.reload(src)
importlib.reload(src.forecast)
importlib.reload(src.preprocess)
importlib.reload(src.lora)
importlib.reload(src.data_create)
importlib.reload(src.data_prepare)

from src.forecast import *
from src.lora import LoRALinear
from src.preprocess import *
from src.data_create import *
from src.data_prepare import *  

np.random.seed(random_state)     

ft = time.time()
model_lora, tokenizer = load_qwen()
lt = time.time()

print('time-taken: ', (lt - ft)/60, 'mins') 

pprint(model_lora.config)

Can directly load modified model --> LoRATrainer.get_model()

In [None]:
def load_config(config_path):
    with open(config_path, "r") as file:
        config = yaml.safe_load(file)
    return config

manual_config = load_config("src/config.yaml")  
train_split = manual_config['train_split']
time_step_split = manual_config['time_step_split']
batch_size = manual_config['batch_size']
learning_rate = manual_config['learning_rate'] # 1e-5
lora_rank = manual_config['lora_rank']
max_ctx_length = manual_config['seq_length']
forecast_length = manual_config['forecast_length']
max_tokens = manual_config['seq_length']  

In [None]:

In [16]:  ### NO: test-train split because of chunking later


In [None]:
data_prey, data_prey_true, data_pred, data_pred_true, time_data_past, time_data_true = load_data(file_path, time_step_split, is_plot = True)
print(data_prey.shape, data_prey_true.shape, data_pred.shape, data_pred_true.shape, time_data_past.shape, time_data_true.shape)  Out [16]:  Keys in HDF5 file: ['time', 'trajectories']

check_rn = random.randint(0, len(data_prey))
print('check_rn: ', check_rn)

In [None]:
fig = plt.figure(figsize=(9, 4))

plt.plot(time_data_past, data_prey[check_rn], label = 'prey')
plt.plot(time_data_past, data_pred[check_rn], label = 'predator')

plt.plot(time_data_true, data_prey_true[check_rn], label = 'prey_truth', marker = '.')
plt.plot(time_data_true, data_pred_true[check_rn], label = 'predator_truth', marker = '.')

plt.xlabel('time')
plt.ylabel('population')

plt.legend()
plt.title(f'Prey-Predator Evolution | idx: {check_rn}')
plt.show()  

In [None]:
# model_lora.config.max_position_embeddings = manual_config['seq_length']
model_lora.config.num_hidden_layers = manual_config['hidden_layers']  Dataset-Creation (Can't use the Untrained Qwen Preprocessing Module --> Because this one does a decoding-only chunking)

In [None]:
train_input_ids, train_target_ids, val_input_ids, val_target_ids, prey_os, pred_os, test_encoded = prepare_data(data_prey, data_pred, tokenizer, max_ctx_length, train_split, forecast_length=forecast_length, is_forecast=True)
print(train_input_ids.shape, train_target_ids.shape, val_input_ids.shape, val_target_ids.shape, test_encoded.shape)

In [None]:
train_dataset = TensorDataset(train_input_ids, train_target_ids)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

val_dataset = TensorDataset(val_input_ids, val_target_ids)
val_loader = DataLoader(val_dataset, batch_size=batch_size)  In [30]:  for batch_t, batch_v in zip(train_loader, val_loader):
    print(batch_t[0].shape, batch_v[0].shape)
    break

In [None]:
LoRA pre-training (we low-rank train the query and value projection matrices - retrain the attention networks)

In [None]:
# Apply LoRA to the model:
### replacing attention layers with trainable layers (r * in_dim) + (out_dim * r)
for layer in model_lora.model.layers:
    layer.self_attn.q_proj = LoRALinear(layer.self_attn.q_proj, r=manual_config['lora_rank']) 
    layer.self_attn.v_proj = LoRALinear(layer.self_attn.v_proj, r=manual_config['lora_rank'])  In [32]:  optimizer = torch.optim.Adam((p for p in model_lora.parameters() if p.requires_grad), lr=learning_rate)  In [33]:  def get_model_params(model):
    total_params = sum(p.numel() for p in model.parameters())
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    return total_params, trainable_params

total, trainable = get_model_params(model_lora)
print(f"Total Parameters: {total:,}")
print(f"Trainable Parameters: {trainable:,}") ## 100x lesser

In [None]:
Joint-Training
Do Model Checkpointing for Large Optimization Steps

In [None]:
# accelerator = Accelerator()
# model_lora, optimizer, train_loader, val_loader = accelerator.prepare(model_lora, optimizer, train_loader, val_loader)

model_lora.train()

target_steps = manual_config['training_steps']  # Optimization Steps
print('Target-Train-Steps:', target_steps)

train_steps = 0
progress_bar = tqdm(range(target_steps), desc="Training Steps")

train_curve, val_curve = [], []

# best_val_loss = float('inf')
# checkpoint_freq = 5  # Save model every 5 steps - adjust this as needed

# # Create checkpoint directory if it doesn't exist
# checkpoint_dir = "model_checkpoints"
# os.makedirs(checkpoint_dir, exist_ok=True)

ft = time.time()

while train_steps < target_steps:
    for batch_input_ids, batch_target_ids in train_loader:
        optimizer.zero_grad()
        outputs = model_lora(batch_input_ids, labels=batch_target_ids)  # Use target_ids
        loss = outputs.loss  # Loss function is a model attribute
        loss.backward()
        optimizer.step()

        train_curve.append(loss.detach().cpu().item())  # Store loss for monitoring

        train_steps += 1
        progress_bar.update(1)
        progress_bar.set_postfix(loss=loss.item())
        
        # # Save checkpoint based on frequency
        # if train_steps % checkpoint_freq == 0:
        #     checkpoint_path = os.path.join(checkpoint_dir, f"lora_step_{train_steps}.pt")
        #     # Save LoRA adapter weights
        #     model_lora.save_pretrained(checkpoint_path)
        #     # Save optimizer state
        #     torch.save(optimizer.state_dict(), os.path.join(checkpoint_dir, f"optimizer_step_{train_steps}.pt"))
        #     print(f"Checkpoint saved at step {train_steps}")

    
        ### FOR EVERY TRAINING-STEP WE RUN V-B BATCH 0(T_B * V_B)
        # Validation Loop

        model_lora.eval()
        val_losses = []

        with torch.no_grad():
            for batch_input_ids, batch_target_ids in val_loader:
                val_op = model_lora(batch_input_ids, labels=batch_target_ids)
                val_losses.append(val_op.loss.cpu().item())
            
            # Calculate average validation loss
            avg_val_loss = sum(val_losses) / len(val_losses)
            val_curve.append(avg_val_loss)
            
            # # Save best model based on validation loss
            # if avg_val_loss < best_val_loss:
            #     best_val_loss = avg_val_loss
            #     best_model_path = os.path.join(checkpoint_dir, "best_model.pt")
            #     model_lora.save_pretrained(best_model_path)
            #     print(f"New best model saved with validation loss: {best_val_loss:.4f}")
        
        if train_steps >= target_steps:  # Stop training at the required steps
            break
        
        model_lora.train()  # Resume training mode
    
# # Save final model
# final_model_path = os.path.join(checkpoint_dir, "final_model.pt")
# model_lora.save_pretrained(final_model_path)

lt = time.time()
print('Time taken:', (lt - ft) / 60, 'mins')

model_lora.eval()

In [None]:
plt.plot(range(len(train_curve)), train_curve, color = 'red', marker = '.', label = 'Train')
plt.plot(range(len(val_curve)), val_curve, color = 'blue', marker = '.', label = 'Validation')

plt.ylabel('Loss')
plt.xlabel('#Optimization Steps')

plt.title('Loss-Curve')

plt.legend()
plt.grid()
plt.show()

In [None]:

#ft = time.time()
# torch.save(model, f"saves/model_{target_steps}_{train_split}.pth")
# lt = time.time()


In [None]:

# print('time-taken: ', (lt - ft)/60, ' mins')  Loading Model
 # model_list = os.listdir('saves')
# path = os.path.join('saves', model_list[0])
# print('Load-File: ', path)

# model_lora = torch.load(path, weights_only=False)
# model_lora.eval()  NOTE: Performing Forecasting HERE


In [None]:
test_prompt = test_encoded
print('Test-Prompt: ')
print(test_prompt)

In [None]:
ft = time.time()
prey_pred_response = generate_forecast_v2(model_lora, test_prompt, tokenizer, inf_max_new_tokens=manual_config['inf_max_tokens'])
# prey_pred_response = generate_forecast(model_lora, test_prompt, tokenizer, max_new_tokens=max_tokens)

lt = time.time()

print('time-taken: ', (lt - ft) / 60)

In [None]:
print(type(prey_pred_response), len(prey_pred_response))
print(prey_pred_response)

In [None]:
prey_decoded_response, pred_decoded_response = extract_forecasts(prey_pred_response)

print(len(prey_decoded_response), len(pred_decoded_response))
print(prey_decoded_response)

prey_decoded_response = ts_decoding(prey_decoded_response, model_type="llama", precision=3, offsets=prey_os['offset'][check_rn], scale_factors=prey_os['scale'][check_rn])[:forecast_length]
pred_decoded_response = ts_decoding(pred_decoded_response, model_type="llama", precision=3, offsets=pred_os['offset'][check_rn], scale_factors=pred_os['scale'][check_rn])[:forecast_length]

In [None]:
 fig, axs = plt.subplots(1, 2, figsize = (15, 5))

axs[0].plot(time_data_past, data_prey[check_rn].tolist(), label = 'Past Data')
axs[0].plot(time_data_true[:len(prey_decoded_response)], prey_decoded_response, label = 'Prediction', marker = '.')
axs[0].plot(time_data_true, data_prey_true[check_rn].tolist(), label = 'Truth', marker = '.')

axs[0].set_title('Prey-Population (Joint)')
axs[0].set_xlabel('time')
axs[0].legend()

axs[1].plot(time_data_past, data_pred[check_rn].tolist(), label = 'Past Data')
axs[1].plot(time_data_true[:len(prey_decoded_response)], pred_decoded_response, label = 'Prediction', marker = '.')
axs[1].plot(time_data_true, data_pred_true[check_rn].tolist(), label = 'Truth', marker = '.')

axs[1].set_title('Predator-Population (Joint)')
axs[1].set_xlabel('time')
axs[1].legend()

plt.show()



In [None]:

''' 
Jointly show them in the same plot 
'''
print()

plt.plot(time_data_past, data_prey[check_rn].tolist(), label = 'Past Data')
plt.plot(time_data_true, prey_decoded_response, label = 'Prediction', marker = '.')
plt.plot(time_data_true, data_prey_true[check_rn].tolist(), label = 'Truth', marker = '.')

plt.plot(time_data_past, data_pred[check_rn].tolist(), label = 'Past Data')
plt.plot(time_data_true, pred_decoded_response, label = 'Prediction', marker = '.')
plt.plot(time_data_true, data_pred_true[check_rn].tolist(), label = 'Truth', marker = '.')

plt.xlabel('time')
plt.title('Prey-Predator-Population (Joint)')
plt.legend()
plt.show()