In [1]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from datasets import Dataset
import pandas as pd
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from peft import LoraConfig, get_peft_model, get_peft_model_state_dict
from safetensors.torch import save_file, load_file



# Load and prepare the dataset
def load_and_prepare_data(file_path):
    df = pd.read_csv(file_path)
    # Map custom sentiment schema (-1, 0, 1) to model schema (0, 1, 2)
    df2=df[df["labels"]!=-9999]
    df2["labels"] = df2["labels"].map({-1: int(0), 0: int(1), 1: int(2)})
    return df2


fileloc="TPT_data.csv"
data = load_and_prepare_data(fileloc)

#split data up into training and testing
train_df, test_df=train_test_split(data, test_size=.2, train_size=.8)

print(train_df)

# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest")
model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest", num_labels=3)

# Tokenize the data
def tokenize_function(examples):
    return tokenizer(examples["text"], padding=True, truncation=True, max_length=128)

# Convert to Hugging Face Dataset
datasets = {
    "train": Dataset.from_pandas(train_df),
    "test": Dataset.from_pandas(test_df)
}

encoded_datasets = {key: ds.map(tokenize_function, batched=True) for key, ds in datasets.items()}

print(encoded_datasets['test'])
# Define metrics
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = torch.argmax(torch.tensor(logits), dim=-1)
    acc = accuracy_score(labels, predictions)
    return {"eval_accuracy": acc}  # Ensure the key matches "metric_for_best_model"

# LoRA configuration

training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    num_train_epochs=100,  # <--- This controls the number of epochs
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=10,
    save_total_limit=2,
    load_best_model_at_end=True,
    metric_for_best_model="eval_accuracy",
    label_names=["labels"]
)

lora_config = LoraConfig(
    r=8,
    lora_alpha=32,
    target_modules=["classifier.dense", "classifier.out_proj"],
    lora_dropout=0.1,
    bias="none"
)

lora_model = get_peft_model(model, lora_config)

trainer = Trainer(
    model=lora_model,
    args=training_args,
    train_dataset=encoded_datasets["train"],
    eval_dataset=encoded_datasets["test"],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

trainer.train()

# Evaluate the model
results = trainer.evaluate()

# Report accuracy
accuracy = results["eval_accuracy"] * 100
print(f"Test Accuracy: {accuracy:.2f}%")


  from .autonotebook import tqdm as notebook_tqdm
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2["labels"] = df2["labels"].map({-1: int(0), 0: int(1), 1: int(2)})


                                                  text  labels
146  We also compared post-instruction results for ...       1
235  It appears that in order to achieve significan...       1
79   ISBN-10: 032194920X \r\nP\r\nhysics education ...       2
174  His current teaching interests focus on implem...       1
286  Since 2004, active learning pedagogy has also ...       1
..                                                 ...     ...
181  One very effective way to engage students duri...       2
85   In interactive engagement, the emotional compo...       1
271  The class was a hybrid Peer Instruction and tr...       1
33   Beyond the teaching lab, her interests include...       2
169  The success of the AMP journal noted to date c...       2

[223 rows x 2 columns]


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Map: 100%|██████████| 223/223 [00:00<00:00, 1624.28 examples/s]
Map: 100%|██████████| 56/56 [00:00<00:00, 7480.29 examples/s]
Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to

Dataset({
    features: ['text', 'labels', '__index_level_0__', 'input_ids', 'attention_mask'],
    num_rows: 56
})


Epoch,Training Loss,Validation Loss,Accuracy
1,0.7161,0.474754,0.767857
2,0.7373,0.474025,0.767857
3,0.6641,0.472995,0.767857
4,0.6469,0.471561,0.767857
5,0.7969,0.470647,0.785714
6,0.5907,0.469353,0.785714
7,0.6435,0.468138,0.767857
8,0.7585,0.466611,0.767857
9,0.6974,0.46559,0.767857
10,0.6772,0.464078,0.767857


Test Accuracy: 82.14%


In [1]:
#####This block defines the model to read it back in. Don't know if it's necessary to go through all of this, but I'm trying it

import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from datasets import Dataset
import pandas as pd
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from peft import LoraConfig, get_peft_model, get_peft_model_state_dict
from safetensors.torch import save_file, load_file


# Load and prepare the dataset
def load_and_prepare_data(file_path):
    df = pd.read_csv(file_path)
    # Map custom sentiment schema (-1, 0, 1) to model schema (0, 1, 2)
    df2=df[df["labels"]!=-9999]
    df2["labels"] = df2["labels"].map({-1: int(0), 0: int(1), 1: int(2)})
    return df2


fileloc="TPT_data.csv"
data = load_and_prepare_data(fileloc)

#split data up into training and testing
train_df, test_df=train_test_split(data, test_size=.2, train_size=.8)

print(train_df)

# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest")
model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest", num_labels=3)

# Tokenize the data
def tokenize_function(examples):
    return tokenizer(examples["text"], padding=True, truncation=True, max_length=128)


datasets = {
    "train": Dataset.from_pandas(train_df),
    "test": Dataset.from_pandas(test_df)
}

encoded_datasets = {key: ds.map(tokenize_function, batched=True) for key, ds in datasets.items()}

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = torch.argmax(torch.tensor(logits), dim=-1)
    acc = accuracy_score(labels, predictions)
    return {"eval_accuracy": acc}  # Ensure the key matches "metric_for_best_model"


training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    num_train_epochs=100,  
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=10,
    save_total_limit=2,
    load_best_model_at_end=True,
    metric_for_best_model="eval_accuracy",
    label_names=["labels"]
)

lora_config = LoraConfig(
    r=8,
    lora_alpha=32,
    target_modules=["classifier.dense", "classifier.out_proj"],
    lora_dropout=0.1,
    bias="none"
)

lora_model = get_peft_model(model, lora_config)

trainer = Trainer(
    model=lora_model,
    args=training_args,
    train_dataset=encoded_datasets["train"],
    eval_dataset=encoded_datasets["test"],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)


  from .autonotebook import tqdm as notebook_tqdm
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2["labels"] = df2["labels"].map({-1: int(0), 0: int(1), 1: int(2)})


                                                  text  labels
146  We also compared post-instruction results for ...       1
289  These sims couple naturally with the use of ma...       1
62   Writing assignments in science courses help to...       2
36   Each of these activities tends to create a mor...       2
20   His current research lines are active learning...       1
..                                                 ...     ...
164  One form of active learning, cooperative learn...       1
198  I found that allowing students to make and cor...       2
202  His research interest is development of active...       1
285  This paper has presented some innovative uses ...       2
1    To tackle the misconceptions a student brings ...       2

[223 rows x 2 columns]


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Map: 100%|██████████| 223/223 [00:00<00:00, 4581.92 examples/s]
Map: 100%|██████████| 56/56 [00:00<00:00, 7477.43 examples/s]
  trainer = Trainer(
Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to 

In [2]:


# Save model tensors as safetensors
#save_file(get_peft_model_state_dict(lora_model), "trained_lora_model.safetensors")
#print("LoRA model saved successfully.")
#print("Model saved successfully as safetensors.")

# === LOADING AND USING THE SAVED MODEL ===

# Load tensors and apply to model
# Load the base model
#base_model = AutoModelForSequenceClassification.from_pretrained(
#    "cardiffnlp/twitter-roberta-base-sentiment-latest", num_labels=3
#)

# Apply LoRA modifications
#lora_model = get_peft_model(base_model, lora_config)

# Load the LoRA adapter weights
state_dict = load_file("trained_model.safetensors")

trainer.train()
lora_model.load_state_dict(state_dict, strict=False)

print("LoRA model loaded successfully from safetensors.")

# Evaluate the model after loading
results = trainer.evaluate()
accuracy = results["eval_accuracy"] * 100
print(f"Test Accuracy after loading: {accuracy:.2f}%")


Epoch,Training Loss,Validation Loss,Accuracy
1,0.5644,0.708162,0.660714
2,0.5878,0.70763,0.660714
3,0.6503,0.707378,0.660714
4,0.6815,0.707127,0.625
5,0.6266,0.70716,0.625
6,0.6078,0.707778,0.625
7,0.728,0.708296,0.625
8,0.567,0.709198,0.625
9,0.7129,0.710895,0.642857
10,0.6219,0.71273,0.642857


LoRA model loaded successfully from safetensors.


Test Accuracy after loading: 69.64%


In [4]:
import torch

def analyse_sentence(model, tokenizer, sentence, device="cuda"):
    model.to(device)  # Ensure model is on the GPU
    inputs = tokenizer(sentence, return_tensors="pt", padding=True, truncation=True, max_length=128)
    inputs = {key: value.to(device) for key, value in inputs.items()}  # Move input tensors to GPU

    with torch.no_grad():
        outputs = model(**inputs)

    logits = outputs.logits
    probabilities = torch.nn.functional.softmax(logits, dim=-1).squeeze().tolist()
    predicted_label = torch.argmax(logits, dim=-1).item()

    return predicted_label, probabilities
# Example usage:
sentence = "I love this product!"
label, probs = analyse_sentence(lora_model, tokenizer, sentence)
print(f"Predicted Sentiment: {label} (0 = negative, 1 = neutral, 2 = positive)")
print(f"Probabilities: {probs}")

Predicted Sentiment: 2 (0 = negative, 1 = neutral, 2 = positive)
Probabilities: [0.004979368299245834, 0.02550559677183628, 0.9695150852203369]


In [5]:
df_ajp=pd.read_pickle('df_ajp_merged.pkl')

In [12]:
def remove_invalid_sentences(df, sent_col="ex_sent", sentiment_col="sentiment_human", invalid_value=-9999):
    """
    Removes occurrences of a specified invalid sentiment value and the corresponding sentence.
    
    Parameters:
        df (pd.DataFrame): The dataframe containing sentence and sentiment values.
        sent_col (str): The column name for sentences (can be strings or lists).
        sentiment_col (str): The column name for sentiment values (can be integers or lists).
        invalid_value (int): The value indicating invalid sentiment.

    Returns:
        pd.DataFrame: The cleaned dataframe with invalid sentiment values and their corresponding sentences removed.
    """
    df=df.copy()
    for n, (sentence, sentiment) in enumerate(zip(df[sent_col], df[sentiment_col])):
        if sentiment==invalid_value:
            df=df.drop([n])

    return df
df_new=remove_invalid_sentences(df_ajp)


bert_lbls=[]
for sentence in df_new['ex_sent']:
    label, probs = analyse_sentence(lora_model, tokenizer, sentence)
    bert_lbls.append(label)

from sklearn.metrics import cohen_kappa_score
#print(train['sentiment'].map({-1: 0, 0: 1, 1: 2}))
print(cohen_kappa_score(bert_lbls, df_new['sentiment_human']+1, labels=[0,1,2] ))

0.48658802970729575
