In [None]:
import pandas as pd
import torch
from sklearn.model_selection import train_test_split
from transformers import BertTokenizer, BertForSequenceClassification, AdamW, Trainer, TrainingArguments, AutoTokenizer, AutoModelForSequenceClassification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

## Data

In [None]:
# Load and Preprocess the Data:
import pandas as pd

data = pd.read_csv('data.csv')

In [None]:
data.columns

In [None]:
data.head()

In [None]:
len(data)

In [None]:
data[["outage"]].value_counts()

In [None]:
# Separate samples for each class
outage_samples = data[data['outage'] == 1]
no_outage_samples = data[data['outage'] == 0]

# Randomly sample 500 samples from each class for training data
outage_training_samples = outage_samples.sample(n=500, random_state=42)
no_outage_training_samples = no_outage_samples.sample(n=500, random_state=42)

# Concatenate the samples from both classes for training data
training_data = pd.concat([outage_training_samples, no_outage_training_samples])

# Get the remaining samples for testing data
outage_remaining_samples = outage_samples[~outage_samples.index.isin(outage_training_samples.index)]
no_outage_remaining_samples = no_outage_samples[~no_outage_samples.index.isin(no_outage_training_samples.index)]

# Randomly sample 2000 samples from each class for testing data
outage_testing_samples = outage_remaining_samples.sample(n=2000, random_state=42)
no_outage_testing_samples = no_outage_remaining_samples.sample(n=2000, random_state=42)

# Concatenate the samples from both classes for testing data
testing_data = pd.concat([outage_testing_samples, no_outage_testing_samples])

# Verify the distribution
print(training_data['outage'].value_counts())
print(testing_data['outage'].value_counts())


In [None]:
avg_length_train = training_data.groupby('outage')['text'].apply(lambda x: x.str.len().mean())
avg_length_test = testing_data.groupby('outage')['text'].apply(lambda x: x.str.len().mean())


In [None]:
print(avg_length_train)
print(avg_length_train.mean())

In [None]:
print(avg_length_test)
print(avg_length_test.mean())

## Baselines

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from xgboost import XGBClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

def run_nlp_model(model_name, train_data, test_data):
    """
    Run NLP model workflow for XGBoost, SVM, or Logistic Regression.
    
    Args:
    - model_name (str): Name of the model ('xgboost', 'svm', or 'logistic').
    - train_data (DataFrame): Training data with 'text' and 'outage' columns.
    - test_data (DataFrame): Testing data with 'text' and 'outage' columns.
    """
    # Extract labels from the 'outage' column for training and testing data
    train_labels = train_data['outage']
    test_labels = test_data['outage']
    
    # Preprocessing
    vectorizer = TfidfVectorizer()
    train_features = vectorizer.fit_transform(train_data['text'])
    test_features = vectorizer.transform(test_data['text'])
    
    # Model selection
    if model_name == 'xgboost':
        model = XGBClassifier()
    elif model_name == 'svm':
        model = SVC()
    elif model_name == 'logistic':
        model = LogisticRegression()
    else:
        raise ValueError("Invalid model name. Choose from 'xgboost', 'svm', or 'logistic'.")
    
    # Training
    model.fit(train_features, train_labels)
    
    # Testing
    predictions = model.predict(test_features)
    
    # Evaluation
    accuracy = accuracy_score(test_labels, predictions)
    precision = precision_score(test_labels, predictions)
    recall = recall_score(test_labels, predictions)
    f1 = f1_score(test_labels, predictions)
    
    # Print the evaluation metrics
    print("Model:", model_name)
    print("Accuracy:", accuracy)
    print("Precision:", precision)
    print("Recall:", recall)
    print("F1-score:", f1)


### SVM

In [None]:
# Call the function for SVM
run_nlp_model('svm', training_data, testing_data)

### Logistic

In [None]:
# Call the function for Logistic Regression
run_nlp_model('logistic', training_data, testing_data)

### XGBoost

In [None]:
# Call the function for XGBoost
run_nlp_model('xgboost', training_data, testing_data)


## Transfer Learning with LLMS

### Zero Shot Learning

In [None]:
from transformers import pipeline
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

def evaluate_zero_shot_classification(test_data, model_name):
    # Initialize the zero-shot classification pipeline
    if model_name == "bert":
        classifier = pipeline("zero-shot-classification", model="bert-base-uncased")
    elif model_name == "gpt":
        classifier = pipeline("zero-shot-classification", model="gpt2")
    else:
        raise ValueError("Invalid model name. Choose from 'bert' or 'gpt'.")

    # List of candidate labels
    candidate_labels = ["no_outage", "outage"]

    # Perform zero-shot classification on the testing data
    results = classifier(
        test_data['text'].tolist(),
        candidate_labels,
        multi_label=False
    )

    # Extract predicted labels and scores
    predicted_labels = [result['labels'][0] for result in results]

    # Convert true labels from 0/1 to 'no_outage'/'outage'
    true_labels = test_data['outage'].map({0: 'no_outage', 1: 'outage'}).tolist()

    # Calculate evaluation metrics
    accuracy = accuracy_score(true_labels, predicted_labels)
    precision = precision_score(true_labels, predicted_labels, pos_label="outage")
    recall = recall_score(true_labels, predicted_labels, pos_label="outage")
    f1 = f1_score(true_labels, predicted_labels, pos_label="outage")

    # Print the evaluation metrics
    print(f"{model_name.upper()} Zero-Shot Classification Metrics:")
    print("Accuracy:", accuracy)
    print("Precision:", precision)
    print("Recall:", recall)
    print("F1-score:", f1)
    print()


In [None]:
# Bert
evaluate_zero_shot_classification(testing_data, "bert")


In [None]:
#GPT
evaluate_zero_shot_classification(testing_data, "gpt")

### Few Shot Learning

In [None]:


def create_balanced_data(data, sample_size):
    # Separate samples for each class
    outage_samples = data[data['outage'] == 1]
    no_outage_samples = data[data['outage'] == 0]

    # Randomly sample an equal number of samples from each class
    outage_samples = outage_samples.sample(n=sample_size, random_state=42)
    no_outage_samples = no_outage_samples.sample(n=sample_size, random_state=42)

    # Concatenate the samples from both classes
    balanced_data = pd.concat([outage_samples, no_outage_samples])

    return balanced_data

def run_llm_finetuning(training_data, testing_data, finetune_percentage, model_type):
    # Create a balanced training dataset
    num_samples = int(len(training_data) * finetune_percentage)
    balanced_training_data = create_balanced_data(training_data, num_samples)

    # Extract text and labels from the balanced training data
    train_texts = balanced_training_data['text'].tolist()
    train_labels = balanced_training_data['outage'].tolist()

    # Extract text and labels from the testing data
    test_texts = testing_data['text'].tolist()
    test_labels = testing_data['outage'].tolist()

    # Load tokenizer and model
    tokenizer = AutoTokenizer.from_pretrained(model_type)
    model = AutoModelForSequenceClassification.from_pretrained(model_type, num_labels=2)

    # Tokenize the training and testing data
    train_encodings = tokenizer(train_texts, truncation=True, padding=True)
    test_encodings = tokenizer(test_texts, truncation=True, padding=True)

    # Define the dataset
    class CustomDataset(torch.utils.data.Dataset):
        def __init__(self, encodings, labels):
            self.encodings = encodings
            self.labels = labels

        def __getitem__(self, idx):
            item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
            item['labels'] = torch.tensor(self.labels[idx])
            return item

        def __len__(self):
            return len(self.labels)

    # Create the train and test datasets
    train_dataset = CustomDataset(train_encodings, train_labels)
    test_dataset = CustomDataset(test_encodings, test_labels)

    # Randomly sample 10% of the balanced training data for evaluation
    eval_dataset_size = int(len(balanced_training_data) * 0.1)
    eval_samples = balanced_training_data.sample(n=eval_dataset_size, random_state=42)
    eval_texts = eval_samples['text'].tolist()
    eval_labels = eval_samples['outage'].tolist()
    eval_encodings = tokenizer(eval_texts, truncation=True, padding=True)
    eval_dataset = CustomDataset(eval_encodings, eval_labels)

    # Set up the training arguments
    training_args = TrainingArguments(
        num_train_epochs=10,
        output_dir='./results',
        # num_train_epochs=1,
        per_device_train_batch_size=16,
        per_device_eval_batch_size=16,
        warmup_steps=500,
        weight_decay=0.01,
        logging_dir='./logs',
        save_strategy='epoch',
        evaluation_strategy='epoch',
        load_best_model_at_end=True,
    )

    # Create the trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=eval_dataset,
        compute_metrics=lambda pred: {"accuracy": (pred.predictions.argmax(-1) == pred.label_ids).mean()},
    )

    # Train the model
    trainer.train()

    # Make predictions on the testing data
    predictions = trainer.predict(test_dataset)
    predicted_labels = predictions.predictions.argmax(-1)

    # Calculate evaluation metrics
    accuracy = accuracy_score(test_labels, predicted_labels)
    precision = precision_score(test_labels, predicted_labels)
    recall = recall_score(test_labels, predicted_labels)
    f1 = f1_score(test_labels, predicted_labels)

    # Print the evaluation metrics
    print(f"Model Type: {model_type}")
    print(f"Finetuning Percentage: {finetune_percentage * 100}%")
    print("Accuracy:", accuracy)
    print("Precision:", precision)
    print("Recall:", recall)
    print("F1-score:", f1)

# Define the model types to evaluate
model_types = ['bert-base-uncased', 'gpt2']

# Run LLM finetuning and evaluation for different training percentages and model types
train_percentages = [0.1, 0.2, 0.5, 0.75, 1]
for percentage in train_percentages:
    for model_type in model_types:
        run_llm_finetuning(training_data, testing_data, percentage, model_type)