# 5G Network Operations Insights with Fine Tuning of GPT2 (This is the smallest version of GPT-2, with 124M parameters.)
## Project Overview
Author: Fatih E. NAR<br>
This project aims to deliver a 5g network insight with fine tuning a network performant LLM<br>
Model card: https://huggingface.co/openai-community/gpt2

In [None]:
%pip install -r requirements.txt
%pip install --pre torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/cpu

In [None]:
import lzma
import shutil
import pandas as pd
import os
import torch
import psutil
import threading
import sys
import time
import gc
from datasets import Dataset
from transformers import GPT2LMHeadModel, GPT2Tokenizer, Trainer, TrainingArguments, get_linear_schedule_with_warmup

from peft import get_peft_model, LoraConfig, TaskType

# Save the model and tokenizer
model_save_path = "models/5g_oss_model"
#model_name = "distilgpt2"
model_name = "gpt2"

# Set TOKENIZERS_PARALLELISM to false to avoid warnings
os.environ["TOKENIZERS_PARALLELISM"] = "false"

# Clear GPU cache before starting
torch.cuda.empty_cache()
gc.collect()

# Cap memory usage to a specific size (e.g., 8 GB) for cuda
max_memory_gb = 8
max_memory_mb = max_memory_gb * 1024
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = f'max_split_size_mb:{max_memory_mb}'

# Check if any accelerator is available 
if torch.cuda.is_available():
    print("Using CUDA (NVIDIA GPU)")
    os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
    # Leverage multi-gpu if available
    device1 = torch.device("cuda:0")
    device2 = torch.device("cuda:1") if torch.cuda.device_count() > 1 else torch.device("cuda:0")
    print("Using CUDA")
    # Clear GPU cache before starting
    torch.cuda.empty_cache()
# Check if MPS (Apple Silicon GPU) is available
elif torch.backends.mps.is_available():
    os.environ["PYTORCH_MPS_HIGH_WATERMARK_RATIO"] = "0.0"
    os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
    # Leverage multi-gpu if available
    device1 = torch.device("mps:0")
    device2 = torch.device("mps:1") 
    print("Using MPS")
else:
    device1 = torch.device("cpu")
    device2 = torch.device("cpu")
    print("Using CPU")

# Extract the .xz file
with lzma.open('data/5G_netops_data.csv.xz', 'rb') as f_in:
    with open('data/5G_netops_data.csv', 'wb') as f_out:
        shutil.copyfileobj(f_in, f_out)

# Load the synthetic telecom data
data_path = "data/5G_netops_data.csv"
data = pd.read_csv(data_path)

# Display basic information about the full dataset
data.info()
data.head()

In [None]:
# Fill NaN values and prepare input and target texts
# Ensure all NaN values are filled with empty strings
data = data.fillna('')

# Ensure 'Zip' column is treated as a string
data['Zip'] = data['Zip'].astype(str)

# Create the input_text column
data['input_text'] = data.apply(lambda row: f"Date: {row['Date']} Cell Availability: {row['Cell Availability (%)']} MTTR: {row['MTTR (hours)']} Throughput: {row['Throughput (Mbps)']} Latency: {row['Latency (ms)']} Packet Loss Rate: {row['Packet Loss Rate (%)']} Call Drop Rate: {row['Call Drop Rate (%)']} Handover Success Rate: {row['Handover Success Rate (%)']} Alarm Count: {row['Alarm Count']} Critical Alarm Count: {row['Critical Alarm Count']} Parameter Changes: {row['Parameter Changes']} Successful Configuration Changes: {row['Successful Configuration Changes (%)']} Data Usage: {row['Data Usage (GB)']} User Count: {row['User Count']} Signal Strength: {row['Signal Strength (dBm)']} Jitter: {row['Jitter (ms)']} Connection Setup Success Rate: {row['Connection Setup Success Rate (%)']} Security Incidents: {row['Security Incidents']} Authentication Failures: {row['Authentication Failures']} Temperature: {row['Temperature (°C)']} Humidity: {row['Humidity (%)']} Weather: {row['Weather']} Issue Reported: {row['Issue Reported']} City: {row['City']} State: {row['State']} Zip: {row['Zip']}", axis=1)

# Create the target_text column
data['target_text'] = data['Fault Occurrence Rate'].astype(str)

# Convert to HuggingFace Dataset
dataset = Dataset.from_pandas(data)

# Split the dataset into training and evaluation
train_test_split = dataset.train_test_split(test_size=0.1)
train_dataset = train_test_split['train']
eval_dataset = train_test_split['test']

# Check the loaded dataset
print(f"Training Dataset size: {len(train_dataset)}")
print(f"Evaluation Dataset size: {len(eval_dataset)}")
print(train_dataset[0])

In [None]:
# Load the tokenizer from the pretrained model
tokenizer = GPT2Tokenizer.from_pretrained(model_name)

# Add the pad token if it doesn't exist
if tokenizer.pad_token is None:
    tokenizer.add_special_tokens({'pad_token': tokenizer.eos_token})
else:
    tokenizer.pad_token = tokenizer.eos_token

# Save the tokenizer
tokenizer.save_pretrained(model_save_path)

model = GPT2LMHeadModel.from_pretrained(model_name)
model.resize_token_embeddings(len(tokenizer))
# Save the new model
model.save_pretrained(model_save_path)

# Define preprocessing function
def preprocess_function(examples):
    inputs = examples['input_text']
    targets = examples['target_text']
    model_inputs = tokenizer(inputs, max_length=512, truncation=True, padding='max_length')
    with tokenizer.as_target_tokenizer():
        labels = tokenizer(targets, max_length=512, truncation=True, padding='max_length')
    model_inputs['labels'] = labels['input_ids']
    return model_inputs

# Apply preprocessing
train_dataset = train_dataset.map(preprocess_function, batched=True)
eval_dataset = eval_dataset.map(preprocess_function, batched=True)

columns = ['input_ids', 'attention_mask', 'labels']
train_dataset.set_format(type='torch', columns=columns)
eval_dataset.set_format(type='torch', columns=columns)

# Check the tokenized dataset
print(f"Tokenized Training Dataset size: {len(train_dataset)}")
print(f"Tokenized Evaluation Dataset size: {len(eval_dataset)}")
print(train_dataset[0])

In [None]:
# PEFT Part
lora_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    inference_mode=False,
    r=2,
    lora_alpha=16,
    lora_dropout=0.05
)

model = get_peft_model(model, lora_config)

# Manually split the model across the two GPUs
model.transformer.h[:6].to(device1)  # First half of layers to GPU 1
model.transformer.h[6:].to(device2)  # Second half of layers to GPU 2
model.transformer.ln_f.to(device2)  # Final normalization layer to GPU 2
model.lm_head.to(device2)  # Language modeling head to GPU 2

In [None]:
# Set training arguments
training_args = TrainingArguments(
    output_dir="./results",  # Output directory
    overwrite_output_dir=True,  # Overwrite the content of the output directory
    num_train_epochs=10,  # Number of training epochs
    per_device_train_batch_size=36,  # Batch size per device during training
    gradient_accumulation_steps=12,  # Accumulate gradients over multiple steps
    learning_rate=5e-5,  # Learning rate
    save_steps=2000,  # Save checkpoint every 2000 steps
    save_total_limit=2,  # Limit the total amount of checkpoints
    evaluation_strategy="steps",  # Evaluate during training at each `logging_steps`
    logging_steps=500,  # Log every 500 steps
    eval_steps=2000,  # Evaluate every 2000 steps
    load_best_model_at_end=True,  # Load the best model at the end of training
    metric_for_best_model="loss",  # Use loss to evaluate the best model
    fp16=False,  # Disable mixed precision training for MPS
)

# Create the learning rate scheduler
total_steps = len(train_dataset) // training_args.per_device_train_batch_size * training_args.num_train_epochs
optimizer = torch.optim.AdamW(model.parameters(), lr=training_args.learning_rate)
lr_scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps=total_steps // 10,  # Warm-up for 10% of the total steps
    num_training_steps=total_steps
)

# Create Trainer instance
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    tokenizer=tokenizer,
    optimizers=(optimizer, lr_scheduler)
)

# Function to monitor system usage including GPU metrics
def print_system_usage(stop_event):
    while not stop_event.is_set():
        cpu_usage = psutil.cpu_percent()
        memory_usage = psutil.virtual_memory().percent
        if torch.cuda.is_available():
            gpu_alloc_mem = torch.cuda.memory_allocated() / (1024 ** 2)
            gpu_cached = torch.cuda.memory_reserved() / (1024 ** 2)
            sys.stdout.write(f"\rCPU Usage: {cpu_usage}% | Memory Usage: {memory_usage}% | GPU-Allocated-Memory Usage: {gpu_alloc_mem:.2f}MB | GPU-Cached-Memory Usage: {gpu_cached:.2f}MB")
        elif torch.backends.mps.is_available():
            gpu_alloc_mem = torch.mps.current_allocated_memory() / (1024 ** 2)
            gpu_driver_mem = torch.mps.driver_allocated_memory() / (1024 ** 2)
            sys.stdout.write(f"\rCPU Usage: {cpu_usage}% | Memory Usage: {memory_usage}% | GPU-Allocated-Memory Usage: {gpu_alloc_mem:.2f}MB | GPU-Driver-Memory Usage: {gpu_driver_mem:.2f}MB")
        sys.stdout.flush()
        time.sleep(300)  # Wait to remeasure system usage

# Create an event to stop the thread
stop_event = threading.Event()

# Start the system usage monitoring thread
monitoring_thread = threading.Thread(target=print_system_usage, args=(stop_event,))
monitoring_thread.start()

# Train the model
try:
    trainer.train()
except RuntimeError as e:
    if 'out of memory' in str(e):
        print("CUDA OutOfMemoryError: Out of memory during training. Try reducing the batch size or model size.")
    else:
        raise
finally:
    # Stop the monitoring thread
    stop_event.set()
    monitoring_thread.join()
    if torch.cuda.device_count() > 1:
        model.module.save_pretrained(model_save_path)
    else:
        model.save_pretrained(model_save_path)
    tokenizer.save_pretrained(model_save_path)

print("Training complete and model saved.")

In [None]:
# Results
results = trainer.evaluate(eval_dataset)
print("Evaluation Results:", results)