## Importing libraries and tokenizer

In [13]:
from transformers import T5Tokenizer, T5ForConditionalGeneration
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [46]:
import torch
from torch.utils.data import DataLoader, TensorDataset

In [91]:
# Load model directly
tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-large")

## Cleaning Dataset and EDA

In [166]:
df = pd.read_csv('Q2_20230202_majority 1.csv')

In [167]:
df['label_majority'].value_counts()

label_majority
in-favor              2907
against               1804
neutral-or-unclear    1040
Name: count, dtype: int64

In [168]:
df_infavor = df[df['label_majority'] == 'in-favor']
df_against = df[df['label_majority'] == 'against']
df_neutral = df[df['label_majority'] == 'neutral-or-unclear']

In [169]:
df_infavor = df_infavor.sample(n=len(df_neutral))
df_against = df_against.sample(n=len(df_neutral))

In [170]:
df_finetuning = pd.concat([df_infavor,df_against,df_neutral])[['tweet','label_majority']]

In [171]:
list_tweets = list(df_finetuning['tweet'])
list_labels = list(df_finetuning['label_majority'])

In [172]:
le = LabelEncoder()
y = le.fit_transform(list_labels)

In [173]:
list_tweets_tokenized = [tokenizer(tweet).input_ids[:tokenizer.model_max_length] for tweet in list_tweets]

In [174]:
max_token_length = max(len(tweet) for tweet in list_tweets_tokenized)
pad_token_id = tokenizer.eos_token_id
X = [ tweet_tokenized + [pad_token_id]*(max_token_length-len(tweet_tokenized)) for tweet_tokenized in list_tweets_tokenized ]

## splitting training and testing set

In [175]:
def split_data(feature_arr, label_arr, train_size):
  X_train, X_test, y_train, y_test = train_test_split(feature_arr,label_arr,train_size=train_size,random_state=42,shuffle=True)
  return X_train,X_test, y_train, y_test

In [176]:
train_size = 0.8
X_train, X_test, y_train, y_test = split_data(X,y,train_size)

## Loading the final tensor dataset 

In [None]:
X_train = torch.tensor(X_train)
y_train = torch.tensor(y_train)
# X_test = torch.tensor(X_test)
# y_test = torch.tensor(y_test)

In [None]:
train_dataset = TensorDataset(X_train,y_train)
train_loader = DataLoader(train_dataset, batch_size=8,shuffle=True)
# test_dataset = TensorDataset(X_test,y_test)
# test_loader = DataLoader(test_dataset, batch_size=8,shuffle=True)

## Loading the model, attaching classification head and replacing linear with LORA and linear

In [112]:
model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-large", device_map="auto")
num_classes = 3

Some parameters are on the meta device because they were offloaded to the disk.


In [113]:
import math

class LoRALayer(torch.nn.Module):
    def __init__(self, in_dim, out_dim, rank, alpha):
        super().__init__()
        self.A = torch.nn.Parameter(torch.empty(in_dim, rank))
        torch.nn.init.kaiming_uniform_(self.A, a=math.sqrt(5))  # similar to standard weight initialization
        self.B = torch.nn.Parameter(torch.zeros(rank, out_dim))
        self.alpha = alpha

    def forward(self, x):
        x = self.alpha * (x @ self.A @ self.B)
        return x

In [114]:
class LinearWithLoRA(torch.nn.Module):
    def __init__(self, linear, rank, alpha):
        super().__init__()
        self.linear = linear
        self.lora = LoRALayer(
            linear.in_features, linear.out_features, rank, alpha
        )

    def forward(self, x):
        return self.linear(x) + self.lora(x)

In [115]:
def replace_linear_with_lora(model, rank, alpha):
    for name, module in model.named_children():
        if isinstance(module, torch.nn.Linear):
            # Replace the Linear layer with LinearWithLoRA
            setattr(model, name, LinearWithLoRA(module, rank, alpha))
        else:
            # Recursively apply the same function to child modules
            replace_linear_with_lora(module, rank, alpha)

In [116]:

total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Total trainable parameters before: {total_params:,}")

for param in model.parameters():
    param.requires_grad = False

total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Total trainable parameters after: {total_params:,}")

Total trainable parameters before: 783,150,080
Total trainable parameters after: 0


In [117]:
model.out_head = torch.nn.Linear(in_features=1024, out_features=num_classes)
# trainable by default

In [118]:
replace_linear_with_lora(model, rank=16, alpha=16)
total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Total trainable LoRA parameters: {total_params:,}")

Total trainable LoRA parameters: 18,834,483


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Finetuning loop

In [None]:
optimizer= torch.optim.Adam(model.parameters(), lr=0.0001)
loss_fn = torch.nn.CrossEntropyLoss
no_of_epochs = 2

for epoch in range(no_of_epochs):
    model.train() 
    for features,labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        logits = outputs[:, -1, :]
        loss = loss_fn(logits,labels)
        loss.backward()
        optimizer.step()

    model.eval()
    with torch.no_grad():


## Inference

In [189]:
def generate_category(tweet,max_tokens,mapper):
  input_ids = tokenizer(tweet, return_tensors="pt").input_ids[:max_tokens].to("cuda")
  label_output = model.generate(input_ids)
  label = mapper[label_output]
  return label 

## Saving the model files

In [None]:
torch.save(model.state_dict(), 'model_weights.pth')
torch.save(model, 'model.pth')

## Inference to save the predicted labels in the csv

In [None]:
# freeze the weights
# replace the final layer with a classification layer
# intialize lora layers and dont freeze their weights (which is by default)
# model.train()
# do the typical training loop with optimizer.step, optimizer.zero_grad, loss.backward()

In [None]:
# training loop

# set to training
# initialise optimzier 
# no of epochs 
# define loss function 

# in each iteration
# optimizer.zero_grad()
# forward pass
# loss.backward 
# optimizer.step 

# set to evaluation
# evaluate on test data

In [4]:
# Learnings: 

# model.eval()
# 	• Disables dropout and sets batch norm to inference mode.
# 	• Does NOT disable gradient tracking.

# torch.no_grad()
# 	Disables gradient tracking (no .grad, saves memory, speeds up inference).

# model.train()
# • enables dropout and sets batch norm to inference mode.

# optimizer.zero_grad()
# sets all gradients set now to 0 


In [None]:
# make it into OOPS strcuture

# do we need a validation set and why? What are the different hyperparameters...epochs, batch size  actually its rank and alpha for lora
# the validation set in the code is actually a test set 

In [190]:
# hyperpapramters - learning rate, weight decay
# model structure - rank, alpha 