In [None]:
# Let's train the llama sequence classification model to determine whether we are doing the correct exercise or not.
# given the sequence he should be able to classify into two things.

In [None]:
# The aim was simple - Get the voice, convert it to text, give it to llama to make sequence classification. The classification is done nicely by the llama. We need to train them for the pain and non pain text classification. 

# If the pain is detected in the voice, then we will ask llama to suggest new exercise for the people. 

In [None]:
import sys
!{sys.executable} -m pip install transformers
!{sys.executable} -m pip install transformers accelerate trl bitsandbytes datasets evaluate huggingface-cli
!{sys.executable} -m pip install scikit-learn
!{sys.executable} -m pip install lora
# !{sys.executable} -m pip install peft
!{sys.executable} -m pip install torch
!{sys.executable} -m pip install huggingface_hub datasets
!{sys.executable} -m pip install huggingface --upgrade
!{sys.executable} -m pip install 'accelerate>=0.26.0'

In [2]:
from huggingface_hub import login as hlogin
import os
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
from datasets import Dataset
from sklearn.model_selection import train_test_split
import numpy as np

hf_token = "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"

hlogin(hf_token)

wb_token = "076d04271de0b9efeb853f25df3d1e4e1b0090a1"



In [4]:
# Load the LLaMA model and tokenizer
model_name = "meta-llama/Llama-3.2-1B"
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Assign a padding token to the tokenizer
tokenizer.pad_token = tokenizer.eos_token

# Load the model directly onto multiple GPUs using device_map
device_map = "cpu"  # Automatically balance the model across available GPUs
model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=2,
    device_map=device_map
)

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [5]:
# Paths for Non-Bias and Bias text files
non_pain_path = '/Users/jainilpatel/PycharmProjects/Exercise-Correction/LectureHandsOn/not_pain_text'
pain_path = '/Users/jainilpatel/PycharmProjects/Exercise-Correction/LectureHandsOn/pain_text'

# Make sure the model uses the same padding token
model.config.pad_token_id = tokenizer.pad_token_id

# Function to load text files and create a dataset
def load_texts_and_labels(non_pain_path, pain_path):
    non_bias_texts = []
    bias_texts = []

    # Load non-bias texts
    for file_name in os.listdir(non_pain_path):
        with open(os.path.join(non_pain_path, file_name), 'r', encoding='utf-8') as file:
            non_bias_texts.append(file.read())

    # Load bias texts
    # Load bias texts line by line
    for file_name in os.listdir(pain_path):
        file_path = os.path.join(pain_path, file_name)
        with open(file_path, 'r', encoding='utf-8') as file:
            for line in file:
                bias_texts.append(line.strip())  # Appending each line to bias_texts

    # Create labels
    texts = non_bias_texts + bias_texts
    labels = [0] * len(non_bias_texts) + [1] * len(bias_texts)

    return texts, labels

# Load data
texts, labels = load_texts_and_labels(non_pain_path, pain_path)

# Split into train and validation sets
train_texts, val_texts, train_labels, val_labels = train_test_split(texts, labels, test_size=0.2)

# Tokenize the datasets
train_encodings = tokenizer(train_texts, truncation=True, padding=True, max_length=512)
val_encodings = tokenizer(val_texts, truncation=True, padding=True, max_length=512)

# Create a Hugging Face Dataset
train_dataset = Dataset.from_dict({
    'input_ids': train_encodings['input_ids'],
    'attention_mask': train_encodings['attention_mask'],
    'labels': train_labels
})
val_dataset = Dataset.from_dict({
    'input_ids': val_encodings['input_ids'],
    'attention_mask': val_encodings['attention_mask'],
    'labels': val_labels
})

# Define training arguments with reduced batch size, gradient accumulation, and mixed precision
training_args = TrainingArguments(
    output_dir='./results',
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    # per_device_train_batch_size=2,  # Reduce batch size
    # per_device_eval_batch_size=2,   # Reduce batch size
    num_train_epochs=5,
    weight_decay=0.01,
    bf16=False,  # Disable bf16 precision
    fp16=False,  # Disable fp16 precision
    use_cpu=True,  
    # gradient_accumulation_steps=4,  # Accumulate gradients over 4 steps
  
)

# Initialize the Trainer with the distributed model and datasets
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
)





In [6]:
train_dataset

Dataset({
    features: ['input_ids', 'attention_mask', 'labels'],
    num_rows: 4
})

In [7]:
# Train the model
trainer.train()


Epoch,Training Loss,Validation Loss
1,No log,5.70899
2,No log,8.414573
3,No log,9.722415
4,No log,10.377924


SafetensorError: Error while serializing: IoError(Os { code: 28, kind: StorageFull, message: "No space left on device" })

In [8]:
import torch

# Function to perform inference on new text inputs
def predict(texts):
    # Tokenize the texts just like in the training/validation step
    encodings = tokenizer(texts, truncation=True, padding=True, max_length=512, return_tensors="pt")
    
    # Move the encodings to the correct device (CPU in your case)
    input_ids = encodings['input_ids'].to(model.device)
    attention_mask = encodings['attention_mask'].to(model.device)
    
    # Set the model to evaluation mode
    model.eval()

    # Get model predictions (logits)
    with torch.no_grad():  # No need to calculate gradients during inference
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)

    # Get the predicted label (0 for non-pain, 1 for pain) from logits
    predictions = torch.argmax(outputs.logits, dim=-1)

    return predictions.cpu().numpy()  # Return predictions as a NumPy array

# Example texts for inference
example_texts = [
    "My knee hurts when I bend it.",
    "I feel perfectly fine after the workout."
]

# Get predictions for new texts
predicted_labels = predict(example_texts)

# Output the predicted labels
for text, label in zip(example_texts, predicted_labels):
    print(f"Text: {text} -> Predicted Label: {label}")


Text: My knee hurts when I bend it. -> Predicted Label: 0
Text: I feel perfectly fine after the workout. -> Predicted Label: 0


In [None]:
# Save the model. 
model.save_pretrained('./Trained_Models/pain_model')
tokenizer.save_pretrained('./Trained_Models/pain_model')

In [None]:
import torch
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, roc_auc_score
from transformers import Trainer


# Evaluate function
def evaluate_model(trainer, val_dataset):
    # Get predictions and labels
    predictions, labels, _ = trainer.predict(val_dataset)

    # Apply softmax to get probabilities
    probabilities = torch.nn.functional.softmax(torch.tensor(predictions), dim=-1)

    # Get predicted classes and positive class probabilities
    preds = torch.argmax(probabilities, dim=1).numpy()
    prob_pain = probabilities[:, 1].numpy()  # Assuming class 1 is 'pain'

    # Calculate evaluation metrics
    accuracy = accuracy_score(labels, preds)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='binary')
    auc = roc_auc_score(labels, prob_pain)

    return accuracy, precision, recall, f1, auc


# Evaluate the model
accuracy, precision, recall, f1, auc = evaluate_model(trainer, val_dataset)

# Print the metrics
print(f'Accuracy: {accuracy:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1 Score: {f1:.4f}')
print(f'AUC: {auc:.4f}')


In [None]:
## how about loading the bert model too and merging the predictions based on the ensemble learning to see whether the f1 score increases or not.

In [9]:
from transformers import BertTokenizer, BertForSequenceClassification

# Load the pre-trained BERT model and tokenizer
bert_model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)
bert_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Ensure both models use the same padding token
bert_model.config.pad_token_id = tokenizer.pad_token_id


In [None]:
def get_predictions(model, tokenizer, val_dataset):
    # Use the Trainer to get predictions
    trainer = Trainer(
        model=model,
        args=training_args,
    )
    predictions, _, _ = trainer.predict(val_dataset)

    # Apply softmax to get probabilities
    probabilities = torch.nn.functional.softmax(torch.tensor(predictions), dim=-1)
    return probabilities.numpy()  # Return as numpy array


In [10]:
# Get predictions from both models
llama_probabilities = get_predictions(model, tokenizer, val_dataset)
bert_probabilities = get_predictions(bert_model, bert_tokenizer, val_dataset)

# Combine predictions (e.g., averaging probabilities)
ensemble_probabilities = (llama_probabilities + bert_probabilities) / 2  # Simple average

# Get final predictions
ensemble_preds = np.argmax(ensemble_probabilities, axis=1)


In [None]:
# Get true labels from the validation dataset
true_labels = val_dataset['labels'].numpy()  # Adjust based on how your dataset is structured

# Calculate metrics
accuracy = accuracy_score(true_labels, ensemble_preds)
precision, recall, f1, _ = precision_recall_fscore_support(true_labels, ensemble_preds, average='binary')
auc = roc_auc_score(true_labels, ensemble_probabilities[:, 1])  # Assuming class 1 is 'pain'

# Print the ensemble metrics
print(f'Ensemble Accuracy: {accuracy:.4f}')
print(f'Ensemble Precision: {precision:.4f}')
print(f'Ensemble Recall: {recall:.4f}')
print(f'Ensemble F1 Score: {f1:.4f}')
print(f'Ensemble AUC: {auc:.4f}')


