In [1]:
!pip cache purge
!pip install torch

[0mFiles removed: 0


In [148]:
import string 
import numpy as np
import torch
import random
from tqdm import tqdm
from util import load_train_data, load_test_data


# Load data
positive_file = "hotelPosT-train.txt"
negative_file = "hotelNegT-train.txt"
all_texts, all_labels = load_train_data(positive_file, negative_file)
test_file = "HW2-testset.txt"

test_texts, test_labels = load_test_data(test_file)

# Count positive and negative samples
num_positive = sum(all_labels)
num_negative = len(all_labels) - num_positive

print(f"Total reviews: {len(all_texts)}")
print(f"Positive reviews: {num_positive}")
print(f"Negative reviews: {num_negative}")

# Count positive and negative samples
num_test = sum(test_labels)

print(f"Total Test reviews: {len(test_texts)}")
print(f"Tiotal reviews: {num_test}")


Total reviews: 189
Positive reviews: 95
Negative reviews: 94
Total Test reviews: 50
Tiotal reviews: 25


In [149]:
from sklearn.model_selection import train_test_split

train_texts, dev_texts, train_labels, dev_labels = train_test_split(
    all_texts, all_labels, test_size=0.2, random_state=42
)


print(f"Training set size: {len(train_texts)}")
print(f"Development set size: {len(dev_texts)}")


Training set size: 151
Development set size: 38


In [150]:
def precision(predicted_labels, true_labels):
    tp = sum(p == 1 and t == 1 for p, t in zip(predicted_labels, true_labels))
    fp = sum(p == 1 and t == 0 for p, t in zip(predicted_labels, true_labels))
    return tp / (tp + fp) if (tp + fp) > 0 else 0

def recall(predicted_labels, true_labels):
    tp = sum(p == 1 and t == 1 for p, t in zip(predicted_labels, true_labels))
    fn = sum(p == 0 and t == 1 for p, t in zip(predicted_labels, true_labels))
    return tp / (tp + fn) if (tp + fn) > 0 else 0

def f1(predicted_labels, true_labels):
    p = precision(predicted_labels, true_labels)
    r = recall(predicted_labels, true_labels)
    return 2 * p * r / (p + r) if (p + r) > 0 else 0

# Example test case
true_labels = [1, 1, 1, 1, 0, 1, 0, 0, 1, 0]
predicted_labels = [1, 0, 1, 0, 0, 1, 1, 0, 1, 0]

print("Precision:", precision(predicted_labels, true_labels))
print("Recall:", recall(predicted_labels, true_labels))
print("F1 Score:", f1(predicted_labels, true_labels))


Precision: 0.8
Recall: 0.6666666666666666
F1 Score: 0.7272727272727272


In [151]:
def featurize_text(text):
    # Tokenize 
    words = text.lower().split()
    positive_count = sum(1 for word in words if word in positive_lexicon)
    negative_count = sum(1 for word in words if word in negative_lexicon)
    contains_no = 1.0 if "no" in words else 0.0
    pronouns = {"i", "me", "them", "they", "my", "mine", "we", "us", "our", "ours", "you", "your", "yours"}
    pronoun_count = sum(1 for word in words if word in pronouns)
    contains_exclamation = 1.0 if "!" in text else 0.0
    log_word_count = math.log(len(words)) if len(words) > 0 else 0.0
    feature_vector = [positive_count, negative_count, contains_no, pronoun_count, contains_exclamation, log_word_count]
    return feature_vector

In [152]:
def load_lexicon(file_path):
    with open(file_path, 'r') as f:
        return set(line.strip() for line in f.readlines())

positive_lexicon = load_lexicon('positive-words.txt')
negative_lexicon = load_lexicon('negative-words.txt')

In [153]:
train_vectors = [featurize_text(review) for review in train_texts]
all_dev_feature_vectors = [featurize_text(review) for review in dev_texts]
all_test_feature_vectors = [featurize_text(review) for review in test_texts]

In [154]:
def normalize(feature_vectors):
    feature_vectors = np.array(feature_vectors)
    
    min_vals = feature_vectors.min(axis=0)
    max_vals = feature_vectors.max(axis=0)
    normalized = (feature_vectors - min_vals) / (max_vals - min_vals)
    
    return normalized.tolist()


In [155]:
train_vectors = normalize(train_vectors)
all_dev_feature_vectors = normalize(all_dev_feature_vectors)
all_test_feature_vectors = normalize(all_test_feature_vectors)
print("\nOriginal Feature Vectors for Train Texts(for the first 5 reviews):")
for i, vec in enumerate(train_vectors[:5]):  # Display first 5 for brevity
    print(f"Review {i+1}: {vec}")

print("\nNormalized Feature Vectors for Train Texts(for the first 5 reviews):")
for i, vec in enumerate(train_vectors[:5]):  # Display first 5 for brevity
    print(f"Review {i+1}: {vec}")
    
print("\nOriginal Feature Vectors for Dev Texts(for the first 5 reviews):")
for i, vec in enumerate(all_dev_feature_vectors[:5]):  # Display first 5 for brevity
    print(f"Review {i+1}: {vec}")

print("\nNormalized Feature Vectors for Dev Texts(for the first 5 reviews):")
for i, vec in enumerate(all_dev_feature_vectors[:5]):  # Display first 5 for brevity
    print(f"Review {i+1}: {vec}")
    
print("\nOriginal Feature Vectors for Test Texts(for the first 5 reviews):")
for i, vec in enumerate(all_test_feature_vectors[:5]):  # Display first 5 for brevity
    print(f"Review {i+1}: {vec}")

print("\nNormalized Feature Vectors for Test Texts(for the first 5 reviews):")
for i, vec in enumerate(all_test_feature_vectors[:5]):  # Display first 5 for brevity
    print(f"Review {i+1}: {vec}")


Original Feature Vectors for Train Texts(for the first 5 reviews):
Review 1: [0.24, 0.0, 0.05263157894736842, 0.7533293705497731, 0.041666666666666664, 0.07142857142857142]
Review 2: [0.12, 0.5, 0.2425629290617849, 0.19225549836219014, 0.375, 0.26785714285714285]
Review 3: [0.04, 0.25, 0.1853546910755149, 0.3932073984680937, 0.041666666666666664, 0.19642857142857142]
Review 4: [0.56, 0.0, 0.32036613272311215, 0.7985827929674446, 0.08333333333333333, 0.4107142857142857]
Review 5: [0.24, 0.25, 0.10297482837528604, 0.06674174948248364, 0.125, 0.14285714285714285]

Normalized Feature Vectors for Train Texts(for the first 5 reviews):
Review 1: [0.24, 0.0, 0.05263157894736842, 0.7533293705497731, 0.041666666666666664, 0.07142857142857142]
Review 2: [0.12, 0.5, 0.2425629290617849, 0.19225549836219014, 0.375, 0.26785714285714285]
Review 3: [0.04, 0.25, 0.1853546910755149, 0.3932073984680937, 0.041666666666666664, 0.19642857142857142]
Review 4: [0.56, 0.0, 0.32036613272311215, 0.79858279296744

In [156]:
'''
CSCI 5832 Assignment 2
Spring 2025
The following sample code was taken from a tutorial by PyTorch and modified for our assignment.
Source: https://pytorch.org/tutorials/beginner/nlp/deep_learning_tutorial.html
'''
class SentimentClassifier(torch.nn.Module):

    def __init__(self, input_dim: int = 6, output_size: int = 1):
        super(SentimentClassifier, self).__init__()

        # Define the parameters that we will need.
        # Torch defines nn.Linear(), which gives the linear function z = Xw + b.
        self.linear = torch.nn.Linear(input_dim, output_size)

    def forward(self, feature_vec):
        # Pass the input through the linear layer,
        # then pass that through sigmoid to get a probability.
        z = self.linear(feature_vec)
        return torch.sigmoid(z)
    
    def logprob2label(self, prob):
        """Convert probability to a binary label (0 or 1)"""
        return (prob >= 0.5).int()
    

model = SentimentClassifier()

# the model knows its parameters.  The first output below is X, the second is b.
# Whenever you assign a component to a class variable in the __init__ function
# of a module, which was done with the line
# self.linear = nn.Linear(...)
# Then through some Python magic from the PyTorch devs, your module
# (in this case, SentimentClassifier) will store knowledge of the nn.Linear's parameters
for param in model.parameters():
    print(param)

# To run the model, pass in a feature vector
# Here we don't need to train, so the code is wrapped in torch.no_grad()
with torch.no_grad():
    sample_feature_vector = torch.tensor([[3.0, 2.0, 1.0, 3.0, 0.0, 4.18965482711792],[3.0, 2.0, 1.0, 3.0, 0.0, 4.18965482711792]])
    log_prob = model(sample_feature_vector)
    print('Log probability from the untrained model:', log_prob)
    print('Label based on the log probability:', model.logprob2label(log_prob))


# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# Sample training loop below. Because it uses functions that you are asked to write for the assignment,     #
# it will not run as is, and is not guaranteed to work with your existing code. You may need to modify it.  #
#                                                                                                           #
# No need to use this code if you have a better way,                                                        #
# or if you can't figure out how to make it run with your existing code.                                    #
# It is only provided here to give you an idea of how we expect you to train the model.                     #
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #



loss_function = torch.nn.BCELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
num_epochs = 100
batch_size = 16


# Featurize and normalize
# train_vectors_test = [featurize_text(text) for text in train_texts]  
# train_vectors_test = normalize(train_vectors_test)  

train_labels_tensor = torch.tensor(train_labels, dtype=torch.float32).unsqueeze(1)  

for epoch in range(num_epochs):
    samples = list(zip(train_vectors, train_labels_tensor))
    random.shuffle(samples)
    batches = [samples[i:i + batch_size] for i in range(0, len(samples), batch_size)]

    epoch_i_train_losses = []

    for batch in tqdm(batches):
        feature_vectors, labels = zip(*batch)
        
        # Convert to tensors
        feature_vectors = torch.tensor(feature_vectors, dtype=torch.float32)
        labels = torch.tensor(labels, dtype=torch.float32).unsqueeze(1)  # Ensure labels are 2D
        
        # Step 1: Zero gradients
        model.zero_grad()

        # Step 2: Forward pass
        log_probs = model(feature_vectors)

        # Step 3: Compute loss
        loss = loss_function(log_probs, labels)

        # Step 4: Backpropagation and optimization
        loss.backward()
        optimizer.step()

        # Store loss
        epoch_i_train_losses.append(loss.item())
    
    # Print the average loss for this epoch
    print('Epoch:', epoch)
    print('Avg train loss:', sum(epoch_i_train_losses) / len(epoch_i_train_losses))

Parameter containing:
tensor([[-0.2572, -0.2950,  0.3682, -0.0405, -0.2338,  0.0362]],
       requires_grad=True)
Parameter containing:
tensor([-0.1510], requires_grad=True)
Log probability from the untrained model: tensor([[0.2471],
        [0.2471]])
Label based on the log probability: tensor([[0],
        [0]], dtype=torch.int32)


100%|██████████| 10/10 [00:00<00:00, 1497.06it/s]


Epoch: 0
Avg train loss: 0.6845852732658386


100%|██████████| 10/10 [00:00<00:00, 1369.30it/s]


Epoch: 1
Avg train loss: 0.6738928496837616


100%|██████████| 10/10 [00:00<00:00, 1767.21it/s]


Epoch: 2
Avg train loss: 0.6677928924560547


100%|██████████| 10/10 [00:00<00:00, 1713.15it/s]


Epoch: 3
Avg train loss: 0.6592692792415619


100%|██████████| 10/10 [00:00<00:00, 1823.13it/s]


Epoch: 4
Avg train loss: 0.651218843460083


100%|██████████| 10/10 [00:00<00:00, 1894.96it/s]


Epoch: 5
Avg train loss: 0.6434637010097504


100%|██████████| 10/10 [00:00<00:00, 1630.31it/s]


Epoch: 6
Avg train loss: 0.636277723312378


100%|██████████| 10/10 [00:00<00:00, 1907.28it/s]


Epoch: 7
Avg train loss: 0.6291180849075317


100%|██████████| 10/10 [00:00<00:00, 1946.22it/s]


Epoch: 8
Avg train loss: 0.6247415840625763


100%|██████████| 10/10 [00:00<00:00, 1820.60it/s]


Epoch: 9
Avg train loss: 0.6173134744167328


100%|██████████| 10/10 [00:00<00:00, 1524.04it/s]


Epoch: 10
Avg train loss: 0.6136835098266602


100%|██████████| 10/10 [00:00<00:00, 1701.20it/s]


Epoch: 11
Avg train loss: 0.609208208322525


100%|██████████| 10/10 [00:00<00:00, 1542.42it/s]


Epoch: 12
Avg train loss: 0.6033635258674621


100%|██████████| 10/10 [00:00<00:00, 1715.81it/s]


Epoch: 13
Avg train loss: 0.5966301798820496


100%|██████████| 10/10 [00:00<00:00, 1562.30it/s]


Epoch: 14
Avg train loss: 0.5957500159740448


100%|██████████| 10/10 [00:00<00:00, 1828.06it/s]


Epoch: 15
Avg train loss: 0.588167530298233


100%|██████████| 10/10 [00:00<00:00, 1821.16it/s]


Epoch: 16
Avg train loss: 0.5862464368343353


100%|██████████| 10/10 [00:00<00:00, 1890.09it/s]


Epoch: 17
Avg train loss: 0.5741588443517684


100%|██████████| 10/10 [00:00<00:00, 1868.45it/s]


Epoch: 18
Avg train loss: 0.5775596827268601


100%|██████████| 10/10 [00:00<00:00, 1535.53it/s]


Epoch: 19
Avg train loss: 0.5718108475208282


100%|██████████| 10/10 [00:00<00:00, 1311.95it/s]


Epoch: 20
Avg train loss: 0.5639356851577759


100%|██████████| 10/10 [00:00<00:00, 1513.86it/s]


Epoch: 21
Avg train loss: 0.5669748544692993


100%|██████████| 10/10 [00:00<00:00, 1634.44it/s]


Epoch: 22
Avg train loss: 0.5541732341051102


100%|██████████| 10/10 [00:00<00:00, 1481.41it/s]


Epoch: 23
Avg train loss: 0.5599292159080506


100%|██████████| 10/10 [00:00<00:00, 1695.01it/s]


Epoch: 24
Avg train loss: 0.5481829017400741


100%|██████████| 10/10 [00:00<00:00, 1740.09it/s]


Epoch: 25
Avg train loss: 0.5491133391857147


100%|██████████| 10/10 [00:00<00:00, 1257.85it/s]


Epoch: 26
Avg train loss: 0.541738411784172


100%|██████████| 10/10 [00:00<00:00, 1564.63it/s]


Epoch: 27
Avg train loss: 0.5427100896835327


100%|██████████| 10/10 [00:00<00:00, 1444.27it/s]


Epoch: 28
Avg train loss: 0.5403582453727722


100%|██████████| 10/10 [00:00<00:00, 1716.16it/s]


Epoch: 29
Avg train loss: 0.5370671510696411


100%|██████████| 10/10 [00:00<00:00, 1733.47it/s]


Epoch: 30
Avg train loss: 0.5314882189035416


100%|██████████| 10/10 [00:00<00:00, 1731.04it/s]


Epoch: 31
Avg train loss: 0.536635023355484


100%|██████████| 10/10 [00:00<00:00, 1830.93it/s]


Epoch: 32
Avg train loss: 0.527933445572853


100%|██████████| 10/10 [00:00<00:00, 1905.81it/s]


Epoch: 33
Avg train loss: 0.5336944937705994


100%|██████████| 10/10 [00:00<00:00, 1799.67it/s]


Epoch: 34
Avg train loss: 0.5189680010080338


100%|██████████| 10/10 [00:00<00:00, 1996.72it/s]


Epoch: 35
Avg train loss: 0.5184868633747101


100%|██████████| 10/10 [00:00<00:00, 1918.19it/s]


Epoch: 36
Avg train loss: 0.5143346220254899


100%|██████████| 10/10 [00:00<00:00, 1852.69it/s]


Epoch: 37
Avg train loss: 0.5183494597673416


100%|██████████| 10/10 [00:00<00:00, 1810.62it/s]


Epoch: 38
Avg train loss: 0.5148908823728562


100%|██████████| 10/10 [00:00<00:00, 1496.52it/s]


Epoch: 39
Avg train loss: 0.5119739681482315


100%|██████████| 10/10 [00:00<00:00, 1311.58it/s]


Epoch: 40
Avg train loss: 0.5105486184358596


100%|██████████| 10/10 [00:00<00:00, 1736.20it/s]


Epoch: 41
Avg train loss: 0.5018544167280197


100%|██████████| 10/10 [00:00<00:00, 1824.32it/s]


Epoch: 42
Avg train loss: 0.5115246891975402


100%|██████████| 10/10 [00:00<00:00, 1872.79it/s]


Epoch: 43
Avg train loss: 0.4947472780942917


100%|██████████| 10/10 [00:00<00:00, 1629.30it/s]


Epoch: 44
Avg train loss: 0.5092463076114655


100%|██████████| 10/10 [00:00<00:00, 1581.92it/s]


Epoch: 45
Avg train loss: 0.4966872274875641


100%|██████████| 10/10 [00:00<00:00, 1879.17it/s]


Epoch: 46
Avg train loss: 0.4980867803096771


100%|██████████| 10/10 [00:00<00:00, 1850.97it/s]


Epoch: 47
Avg train loss: 0.5002893000841141


100%|██████████| 10/10 [00:00<00:00, 1746.24it/s]


Epoch: 48
Avg train loss: 0.48865213096141813


100%|██████████| 10/10 [00:00<00:00, 1721.52it/s]


Epoch: 49
Avg train loss: 0.48548262715339663


100%|██████████| 10/10 [00:00<00:00, 1733.40it/s]


Epoch: 50
Avg train loss: 0.49466651678085327


100%|██████████| 10/10 [00:00<00:00, 1664.27it/s]


Epoch: 51
Avg train loss: 0.490256080031395


100%|██████████| 10/10 [00:00<00:00, 1921.96it/s]


Epoch: 52
Avg train loss: 0.4845461815595627


100%|██████████| 10/10 [00:00<00:00, 1905.81it/s]


Epoch: 53
Avg train loss: 0.4847874492406845


100%|██████████| 10/10 [00:00<00:00, 1551.55it/s]


Epoch: 54
Avg train loss: 0.48401854038238523


100%|██████████| 10/10 [00:00<00:00, 1728.11it/s]


Epoch: 55
Avg train loss: 0.47730816900730133


100%|██████████| 10/10 [00:00<00:00, 1926.02it/s]


Epoch: 56
Avg train loss: 0.485881906747818


100%|██████████| 10/10 [00:00<00:00, 1912.33it/s]


Epoch: 57
Avg train loss: 0.48528223037719725


100%|██████████| 10/10 [00:00<00:00, 1813.83it/s]


Epoch: 58
Avg train loss: 0.48110365569591523


100%|██████████| 10/10 [00:00<00:00, 1949.75it/s]


Epoch: 59
Avg train loss: 0.47594261467456817


100%|██████████| 10/10 [00:00<00:00, 1952.84it/s]


Epoch: 60
Avg train loss: 0.47024109661579133


100%|██████████| 10/10 [00:00<00:00, 2066.46it/s]


Epoch: 61
Avg train loss: 0.475181382894516


100%|██████████| 10/10 [00:00<00:00, 1943.25it/s]


Epoch: 62
Avg train loss: 0.4843090891838074


100%|██████████| 10/10 [00:00<00:00, 1952.56it/s]


Epoch: 63
Avg train loss: 0.47468664050102233


100%|██████████| 10/10 [00:00<00:00, 1954.84it/s]


Epoch: 64
Avg train loss: 0.4690114349126816


100%|██████████| 10/10 [00:00<00:00, 1818.79it/s]


Epoch: 65
Avg train loss: 0.4698569178581238


100%|██████████| 10/10 [00:00<00:00, 1701.75it/s]


Epoch: 66
Avg train loss: 0.46323978900909424


100%|██████████| 10/10 [00:00<00:00, 1460.82it/s]


Epoch: 67
Avg train loss: 0.45802857875823977


100%|██████████| 10/10 [00:00<00:00, 1800.21it/s]


Epoch: 68
Avg train loss: 0.45716341137886046


100%|██████████| 10/10 [00:00<00:00, 1740.66it/s]


Epoch: 69
Avg train loss: 0.45879376828670504


100%|██████████| 10/10 [00:00<00:00, 1635.72it/s]


Epoch: 70
Avg train loss: 0.4577661007642746


100%|██████████| 10/10 [00:00<00:00, 1291.15it/s]


Epoch: 71
Avg train loss: 0.4584824949502945


100%|██████████| 10/10 [00:00<00:00, 1479.42it/s]


Epoch: 72
Avg train loss: 0.4608888953924179


100%|██████████| 10/10 [00:00<00:00, 1483.71it/s]


Epoch: 73
Avg train loss: 0.45642916262149813


100%|██████████| 10/10 [00:00<00:00, 1409.00it/s]


Epoch: 74
Avg train loss: 0.453093421459198


100%|██████████| 10/10 [00:00<00:00, 1447.31it/s]


Epoch: 75
Avg train loss: 0.46523565948009493


100%|██████████| 10/10 [00:00<00:00, 1567.20it/s]


Epoch: 76
Avg train loss: 0.4547290563583374


100%|██████████| 10/10 [00:00<00:00, 1587.31it/s]


Epoch: 77
Avg train loss: 0.4500205248594284


100%|██████████| 10/10 [00:00<00:00, 1576.21it/s]


Epoch: 78
Avg train loss: 0.45406404733657835


100%|██████████| 10/10 [00:00<00:00, 1604.92it/s]


Epoch: 79
Avg train loss: 0.4520445466041565


100%|██████████| 10/10 [00:00<00:00, 1596.61it/s]


Epoch: 80
Avg train loss: 0.44490715861320496


100%|██████████| 10/10 [00:00<00:00, 1575.21it/s]


Epoch: 81
Avg train loss: 0.44852415919303895


100%|██████████| 10/10 [00:00<00:00, 1987.45it/s]


Epoch: 82
Avg train loss: 0.43780567944049836


100%|██████████| 10/10 [00:00<00:00, 2032.12it/s]


Epoch: 83
Avg train loss: 0.44689629375934603


100%|██████████| 10/10 [00:00<00:00, 1994.15it/s]


Epoch: 84
Avg train loss: 0.4376087307929993


100%|██████████| 10/10 [00:00<00:00, 1930.01it/s]


Epoch: 85
Avg train loss: 0.4536299467086792


100%|██████████| 10/10 [00:00<00:00, 1689.34it/s]


Epoch: 86
Avg train loss: 0.4433282345533371


100%|██████████| 10/10 [00:00<00:00, 1701.13it/s]


Epoch: 87
Avg train loss: 0.43867531716823577


100%|██████████| 10/10 [00:00<00:00, 1819.50it/s]


Epoch: 88
Avg train loss: 0.44629078209400175


100%|██████████| 10/10 [00:00<00:00, 1660.58it/s]


Epoch: 89
Avg train loss: 0.43393745720386506


100%|██████████| 10/10 [00:00<00:00, 1912.76it/s]


Epoch: 90
Avg train loss: 0.42575321793556214


100%|██████████| 10/10 [00:00<00:00, 1652.93it/s]


Epoch: 91
Avg train loss: 0.44562983959913255


100%|██████████| 10/10 [00:00<00:00, 1409.94it/s]


Epoch: 92
Avg train loss: 0.44444625079631805


100%|██████████| 10/10 [00:00<00:00, 1258.68it/s]


Epoch: 93
Avg train loss: 0.43591445982456206


100%|██████████| 10/10 [00:00<00:00, 1382.34it/s]


Epoch: 94
Avg train loss: 0.4324803203344345


100%|██████████| 10/10 [00:00<00:00, 1318.42it/s]


Epoch: 95
Avg train loss: 0.44366805255413055


100%|██████████| 10/10 [00:00<00:00, 1384.67it/s]


Epoch: 96
Avg train loss: 0.44396171271800994


100%|██████████| 10/10 [00:00<00:00, 1591.40it/s]


Epoch: 97
Avg train loss: 0.43692323863506316


100%|██████████| 10/10 [00:00<00:00, 1860.66it/s]


Epoch: 98
Avg train loss: 0.44408890306949617


100%|██████████| 10/10 [00:00<00:00, 1562.30it/s]


Epoch: 99
Avg train loss: 0.4335035473108292


In [157]:
num_epochs = 100
batch_size = 16
dev_labels_tensor = torch.tensor(dev_labels, dtype=torch.float32).unsqueeze(1) 
optimizer = torch.optim.SGD(model.parameters(), lr=0.05)
for epoch in range(num_epochs):
    samples = list(zip(all_dev_feature_vectors, dev_labels_tensor))
    random.shuffle(samples)
    batches = [samples[i:i + batch_size] for i in range(0, len(samples), batch_size)]

    epoch_i_train_losses = []

    for batch in tqdm(batches):
        feature_vectors, labels = zip(*batch)
        
        # Convert to tensors
        feature_vectors = torch.tensor(feature_vectors, dtype=torch.float32)
        labels = torch.tensor(labels, dtype=torch.float32).unsqueeze(1)  # Ensure labels are 2D
        
        # Step 1: Zero gradients
        model.zero_grad()

        # Step 2: Forward pass
        log_probs = model(feature_vectors)

        # Step 3: Compute loss
        loss = loss_function(log_probs, labels)

        # Step 4: Backpropagation and optimization
        loss.backward()
        optimizer.step()

        # Store loss
        epoch_i_train_losses.append(loss.item())
    
    # Print the average loss for this epoch
    print('Epoch:', epoch)
    print('Avg train loss:', sum(epoch_i_train_losses) / len(epoch_i_train_losses))


100%|██████████| 3/3 [00:00<00:00, 898.07it/s]


Epoch: 0
Avg train loss: 0.44176697731018066


100%|██████████| 3/3 [00:00<00:00, 886.12it/s]


Epoch: 1
Avg train loss: 0.4638017416000366


100%|██████████| 3/3 [00:00<00:00, 1327.17it/s]


Epoch: 2
Avg train loss: 0.4368857443332672


100%|██████████| 3/3 [00:00<00:00, 944.73it/s]


Epoch: 3
Avg train loss: 0.44747884074846905


100%|██████████| 3/3 [00:00<00:00, 993.91it/s]


Epoch: 4
Avg train loss: 0.45474640528361004


100%|██████████| 3/3 [00:00<00:00, 1396.08it/s]


Epoch: 5
Avg train loss: 0.45435841878255206


100%|██████████| 3/3 [00:00<00:00, 1391.45it/s]


Epoch: 6
Avg train loss: 0.4604451557000478


100%|██████████| 3/3 [00:00<00:00, 1368.90it/s]


Epoch: 7
Avg train loss: 0.4397527277469635


100%|██████████| 3/3 [00:00<00:00, 1279.92it/s]


Epoch: 8
Avg train loss: 0.4694468379020691


100%|██████████| 3/3 [00:00<00:00, 1009.86it/s]


Epoch: 9
Avg train loss: 0.49639083941777545


100%|██████████| 3/3 [00:00<00:00, 1158.86it/s]


Epoch: 10
Avg train loss: 0.4877488414446513


100%|██████████| 3/3 [00:00<00:00, 1416.04it/s]


Epoch: 11
Avg train loss: 0.43257341782251996


100%|██████████| 3/3 [00:00<00:00, 1211.18it/s]


Epoch: 12
Avg train loss: 0.46889835596084595


100%|██████████| 3/3 [00:00<00:00, 1141.72it/s]


Epoch: 13
Avg train loss: 0.4328196446100871


100%|██████████| 3/3 [00:00<00:00, 1376.54it/s]


Epoch: 14
Avg train loss: 0.4585046172142029


100%|██████████| 3/3 [00:00<00:00, 1281.62it/s]


Epoch: 15
Avg train loss: 0.4245223601659139


100%|██████████| 3/3 [00:00<00:00, 1093.60it/s]


Epoch: 16
Avg train loss: 0.4366331398487091


100%|██████████| 3/3 [00:00<00:00, 1291.88it/s]


Epoch: 17
Avg train loss: 0.4146653413772583


100%|██████████| 3/3 [00:00<00:00, 1011.08it/s]


Epoch: 18
Avg train loss: 0.42292822400728863


100%|██████████| 3/3 [00:00<00:00, 1026.84it/s]


Epoch: 19
Avg train loss: 0.41910239060719806


100%|██████████| 3/3 [00:00<00:00, 1231.69it/s]


Epoch: 20
Avg train loss: 0.46484025319417316


100%|██████████| 3/3 [00:00<00:00, 1269.85it/s]


Epoch: 21
Avg train loss: 0.45506176352500916


100%|██████████| 3/3 [00:00<00:00, 1328.01it/s]


Epoch: 22
Avg train loss: 0.44501840074857074


100%|██████████| 3/3 [00:00<00:00, 1328.99it/s]


Epoch: 23
Avg train loss: 0.46836162606875104


100%|██████████| 3/3 [00:00<00:00, 1320.76it/s]


Epoch: 24
Avg train loss: 0.44252004226048786


100%|██████████| 3/3 [00:00<00:00, 1219.27it/s]


Epoch: 25
Avg train loss: 0.4484175741672516


100%|██████████| 3/3 [00:00<00:00, 1343.61it/s]


Epoch: 26
Avg train loss: 0.4350263973077138


100%|██████████| 3/3 [00:00<00:00, 1175.97it/s]


Epoch: 27
Avg train loss: 0.4423459470272064


100%|██████████| 3/3 [00:00<00:00, 1316.34it/s]


Epoch: 28
Avg train loss: 0.42852654059727985


100%|██████████| 3/3 [00:00<00:00, 1523.72it/s]


Epoch: 29
Avg train loss: 0.4926674763361613


100%|██████████| 3/3 [00:00<00:00, 1407.64it/s]


Epoch: 30
Avg train loss: 0.435577134291331


100%|██████████| 3/3 [00:00<00:00, 1441.01it/s]


Epoch: 31
Avg train loss: 0.40146106978257495


100%|██████████| 3/3 [00:00<00:00, 1671.48it/s]


Epoch: 32
Avg train loss: 0.4107601543267568


100%|██████████| 3/3 [00:00<00:00, 1548.09it/s]


Epoch: 33
Avg train loss: 0.4762845536073049


100%|██████████| 3/3 [00:00<00:00, 1634.36it/s]


Epoch: 34
Avg train loss: 0.43631768226623535


100%|██████████| 3/3 [00:00<00:00, 1454.00it/s]


Epoch: 35
Avg train loss: 0.42764224608739215


100%|██████████| 3/3 [00:00<00:00, 1367.86it/s]


Epoch: 36
Avg train loss: 0.4123268723487854


100%|██████████| 3/3 [00:00<00:00, 1273.06it/s]


Epoch: 37
Avg train loss: 0.4567711353302002


100%|██████████| 3/3 [00:00<00:00, 1563.29it/s]


Epoch: 38
Avg train loss: 0.45113139351209003


100%|██████████| 3/3 [00:00<00:00, 944.73it/s]


Epoch: 39
Avg train loss: 0.4544653097788493


100%|██████████| 3/3 [00:00<00:00, 1065.45it/s]


Epoch: 40
Avg train loss: 0.4415835738182068


100%|██████████| 3/3 [00:00<00:00, 1002.14it/s]


Epoch: 41
Avg train loss: 0.44197046756744385


100%|██████████| 3/3 [00:00<00:00, 940.99it/s]


Epoch: 42
Avg train loss: 0.4226716061433156


100%|██████████| 3/3 [00:00<00:00, 890.89it/s]


Epoch: 43
Avg train loss: 0.4505275984605153


100%|██████████| 3/3 [00:00<00:00, 720.92it/s]


Epoch: 44
Avg train loss: 0.4291002154350281


100%|██████████| 3/3 [00:00<00:00, 856.68it/s]


Epoch: 45
Avg train loss: 0.41544108589490253


100%|██████████| 3/3 [00:00<00:00, 753.11it/s]


Epoch: 46
Avg train loss: 0.4237392743428548


100%|██████████| 3/3 [00:00<00:00, 894.44it/s]


Epoch: 47
Avg train loss: 0.4335375924905141


100%|██████████| 3/3 [00:00<00:00, 887.93it/s]


Epoch: 48
Avg train loss: 0.416120320558548


100%|██████████| 3/3 [00:00<00:00, 1064.18it/s]


Epoch: 49
Avg train loss: 0.41969571510950726


100%|██████████| 3/3 [00:00<00:00, 594.66it/s]


Epoch: 50
Avg train loss: 0.44630318880081177


100%|██████████| 3/3 [00:00<00:00, 681.48it/s]


Epoch: 51
Avg train loss: 0.414333701133728


100%|██████████| 3/3 [00:00<00:00, 982.89it/s]


Epoch: 52
Avg train loss: 0.4371451139450073


100%|██████████| 3/3 [00:00<00:00, 940.50it/s]


Epoch: 53
Avg train loss: 0.44937074184417725


100%|██████████| 3/3 [00:00<00:00, 771.20it/s]


Epoch: 54
Avg train loss: 0.46027127901713055


100%|██████████| 3/3 [00:00<00:00, 885.62it/s]


Epoch: 55
Avg train loss: 0.4265586535135905


100%|██████████| 3/3 [00:00<00:00, 774.81it/s]


Epoch: 56
Avg train loss: 0.44084931413332623


100%|██████████| 3/3 [00:00<00:00, 1105.70it/s]


Epoch: 57
Avg train loss: 0.42548970381418866


100%|██████████| 3/3 [00:00<00:00, 899.94it/s]


Epoch: 58
Avg train loss: 0.42439694205919903


100%|██████████| 3/3 [00:00<00:00, 911.74it/s]


Epoch: 59
Avg train loss: 0.41621455550193787


100%|██████████| 3/3 [00:00<00:00, 835.69it/s]


Epoch: 60
Avg train loss: 0.4650367796421051


100%|██████████| 3/3 [00:00<00:00, 862.73it/s]


Epoch: 61
Avg train loss: 0.447065790494283


100%|██████████| 3/3 [00:00<00:00, 874.91it/s]


Epoch: 62
Avg train loss: 0.45215916633605957


100%|██████████| 3/3 [00:00<00:00, 869.11it/s]


Epoch: 63
Avg train loss: 0.4187166492144267


100%|██████████| 3/3 [00:00<00:00, 884.50it/s]


Epoch: 64
Avg train loss: 0.44497014085451764


100%|██████████| 3/3 [00:00<00:00, 900.07it/s]


Epoch: 65
Avg train loss: 0.4009702404340108


100%|██████████| 3/3 [00:00<00:00, 787.71it/s]


Epoch: 66
Avg train loss: 0.45164469877878827


100%|██████████| 3/3 [00:00<00:00, 1391.76it/s]


Epoch: 67
Avg train loss: 0.4163526693979899


100%|██████████| 3/3 [00:00<00:00, 1148.60it/s]


Epoch: 68
Avg train loss: 0.4680512448151906


100%|██████████| 3/3 [00:00<00:00, 840.49it/s]


Epoch: 69
Avg train loss: 0.4205908675988515


100%|██████████| 3/3 [00:00<00:00, 1121.67it/s]


Epoch: 70
Avg train loss: 0.40565470854441327


100%|██████████| 3/3 [00:00<00:00, 1066.26it/s]


Epoch: 71
Avg train loss: 0.44755051533381146


100%|██████████| 3/3 [00:00<00:00, 1154.40it/s]


Epoch: 72
Avg train loss: 0.43465956052144367


100%|██████████| 3/3 [00:00<00:00, 1232.17it/s]


Epoch: 73
Avg train loss: 0.41043806076049805


100%|██████████| 3/3 [00:00<00:00, 1182.05it/s]


Epoch: 74
Avg train loss: 0.44467146197954815


100%|██████████| 3/3 [00:00<00:00, 1193.94it/s]


Epoch: 75
Avg train loss: 0.43314988414446515


100%|██████████| 3/3 [00:00<00:00, 1138.31it/s]


Epoch: 76
Avg train loss: 0.4085944990317027


100%|██████████| 3/3 [00:00<00:00, 1079.71it/s]


Epoch: 77
Avg train loss: 0.45523036519686383


100%|██████████| 3/3 [00:00<00:00, 930.28it/s]


Epoch: 78
Avg train loss: 0.4525506794452667


100%|██████████| 3/3 [00:00<00:00, 894.69it/s]


Epoch: 79
Avg train loss: 0.4382609526316325


100%|██████████| 3/3 [00:00<00:00, 935.81it/s]


Epoch: 80
Avg train loss: 0.43700005610783893


100%|██████████| 3/3 [00:00<00:00, 879.92it/s]


Epoch: 81
Avg train loss: 0.46613789598147076


100%|██████████| 3/3 [00:00<00:00, 857.20it/s]


Epoch: 82
Avg train loss: 0.4096555213133494


100%|██████████| 3/3 [00:00<00:00, 1048.40it/s]


Epoch: 83
Avg train loss: 0.4547918935616811


100%|██████████| 3/3 [00:00<00:00, 769.13it/s]


Epoch: 84
Avg train loss: 0.43157235781351727


100%|██████████| 3/3 [00:00<00:00, 765.90it/s]


Epoch: 85
Avg train loss: 0.4102581838766734


100%|██████████| 3/3 [00:00<00:00, 967.25it/s]


Epoch: 86
Avg train loss: 0.3963657120863597


100%|██████████| 3/3 [00:00<00:00, 1054.99it/s]


Epoch: 87
Avg train loss: 0.4559563895066579


100%|██████████| 3/3 [00:00<00:00, 1026.84it/s]


Epoch: 88
Avg train loss: 0.432176411151886


100%|██████████| 3/3 [00:00<00:00, 902.97it/s]


Epoch: 89
Avg train loss: 0.45119399825731915


100%|██████████| 3/3 [00:00<00:00, 974.51it/s]


Epoch: 90
Avg train loss: 0.4663858115673065


100%|██████████| 3/3 [00:00<00:00, 844.77it/s]


Epoch: 91
Avg train loss: 0.42108237743377686


100%|██████████| 3/3 [00:00<00:00, 852.44it/s]


Epoch: 92
Avg train loss: 0.40566088755925495


100%|██████████| 3/3 [00:00<00:00, 862.61it/s]


Epoch: 93
Avg train loss: 0.43707506855328876


100%|██████████| 3/3 [00:00<00:00, 1007.12it/s]


Epoch: 94
Avg train loss: 0.45901866753896076


100%|██████████| 3/3 [00:00<00:00, 956.29it/s]


Epoch: 95
Avg train loss: 0.40858978033065796


100%|██████████| 3/3 [00:00<00:00, 1176.41it/s]


Epoch: 96
Avg train loss: 0.4277440905570984


100%|██████████| 3/3 [00:00<00:00, 1165.62it/s]


Epoch: 97
Avg train loss: 0.470263530810674


100%|██████████| 3/3 [00:00<00:00, 1316.62it/s]


Epoch: 98
Avg train loss: 0.45249488949775696


100%|██████████| 3/3 [00:00<00:00, 1191.56it/s]

Epoch: 99
Avg train loss: 0.4453155001004537





In [158]:
from sklearn.metrics import precision_score, recall_score, f1_score

dev_feature_tensor = torch.tensor(all_dev_feature_vectors, dtype=torch.float32)
true_dev_labels_tensor = torch.tensor(dev_labels, dtype=torch.int)

# Get predicted probabilities from the model
with torch.no_grad():  # No gradient calculation needed for evaluation
    dev_probs = model(dev_feature_tensor)

# Convert probabilities to binary labels
predicted_dev_labels = model.logprob2label(dev_probs)

# Convert tensors to lists for metric calculation
predicted_dev_labels = predicted_dev_labels.squeeze().tolist()
true_dev_labels = true_dev_labels_tensor.tolist()

# Calculate precision, recall, and F1-score
precision = precision_score(true_dev_labels, predicted_dev_labels)
recall = recall_score(true_dev_labels, predicted_dev_labels)
f1 = f1_score(true_dev_labels, predicted_dev_labels)

# Print results
print(f'Precision for Dev: {precision:.4f}')
print(f'Recall for Dev: {recall:.4f}')
print(f'F1-score for Dev: {f1:.4f}')

Precision for Dev: 0.9000
Recall for Dev: 0.9474
F1-score for Dev: 0.9231


In [159]:

num_epochs = 200
optimizer = torch.optim.SGD(model.parameters(), lr=0.05)
batch_size = 16
test_labels_tensor = torch.tensor(test_labels, dtype=torch.float32).unsqueeze(1)  

for epoch in range(num_epochs):
    samples = list(zip(all_test_feature_vectors, test_labels_tensor))
    random.shuffle(samples)
    batches = [samples[i:i + batch_size] for i in range(0, len(samples), batch_size)]

    epoch_i_train_losses = []

    for batch in tqdm(batches):
        feature_vectors, labels = zip(*batch)
        
        # Convert to tensors
        feature_vectors = torch.tensor(feature_vectors, dtype=torch.float32)
        labels = torch.tensor(labels, dtype=torch.float32).unsqueeze(1)  # Ensure labels are 2D
        
        # Step 1: Zero gradients
        model.zero_grad()

        # Step 2: Forward pass
        log_probs = model(feature_vectors)

        # Step 3: Compute loss
        loss = loss_function(log_probs, labels)

        # Step 4: Backpropagation and optimization
        loss.backward()
        optimizer.step()

        # Store loss
        epoch_i_train_losses.append(loss.item())
    
    # Print the average loss for this epoch
    print('Epoch:', epoch)
    print('Avg train loss:', sum(epoch_i_train_losses) / len(epoch_i_train_losses))

100%|██████████| 4/4 [00:00<00:00, 1091.49it/s]


Epoch: 0
Avg train loss: 0.4302847534418106


100%|██████████| 4/4 [00:00<00:00, 1149.36it/s]


Epoch: 1
Avg train loss: 0.40590767562389374


100%|██████████| 4/4 [00:00<00:00, 984.81it/s]


Epoch: 2
Avg train loss: 0.39253050833940506


100%|██████████| 4/4 [00:00<00:00, 1178.75it/s]


Epoch: 3
Avg train loss: 0.4448644816875458


100%|██████████| 4/4 [00:00<00:00, 1097.55it/s]


Epoch: 4
Avg train loss: 0.48115816712379456


100%|██████████| 4/4 [00:00<00:00, 1300.66it/s]


Epoch: 5
Avg train loss: 0.38982269912958145


100%|██████████| 4/4 [00:00<00:00, 1461.56it/s]


Epoch: 6
Avg train loss: 0.42173148691654205


100%|██████████| 4/4 [00:00<00:00, 1613.50it/s]


Epoch: 7
Avg train loss: 0.4136934205889702


100%|██████████| 4/4 [00:00<00:00, 1519.54it/s]


Epoch: 8
Avg train loss: 0.4129953756928444


100%|██████████| 4/4 [00:00<00:00, 1537.64it/s]


Epoch: 9
Avg train loss: 0.42140335589647293


100%|██████████| 4/4 [00:00<00:00, 1384.72it/s]


Epoch: 10
Avg train loss: 0.4483982101082802


100%|██████████| 4/4 [00:00<00:00, 1354.53it/s]


Epoch: 11
Avg train loss: 0.3678414523601532


100%|██████████| 4/4 [00:00<00:00, 958.53it/s]


Epoch: 12
Avg train loss: 0.38908709958195686


100%|██████████| 4/4 [00:00<00:00, 957.00it/s]


Epoch: 13
Avg train loss: 0.38008370995521545


100%|██████████| 4/4 [00:00<00:00, 929.07it/s]


Epoch: 14
Avg train loss: 0.45260845124721527


100%|██████████| 4/4 [00:00<00:00, 999.18it/s]


Epoch: 15
Avg train loss: 0.3626218605786562


100%|██████████| 4/4 [00:00<00:00, 979.23it/s]


Epoch: 16
Avg train loss: 0.44696827977895737


100%|██████████| 4/4 [00:00<00:00, 843.71it/s]


Epoch: 17
Avg train loss: 0.37612026929855347


100%|██████████| 4/4 [00:00<00:00, 1316.38it/s]


Epoch: 18
Avg train loss: 0.4630061313509941


100%|██████████| 4/4 [00:00<00:00, 1343.36it/s]


Epoch: 19
Avg train loss: 0.42828959226608276


100%|██████████| 4/4 [00:00<00:00, 1221.85it/s]


Epoch: 20
Avg train loss: 0.36901770532131195


100%|██████████| 4/4 [00:00<00:00, 1005.41it/s]


Epoch: 21
Avg train loss: 0.35523029416799545


100%|██████████| 4/4 [00:00<00:00, 1350.61it/s]


Epoch: 22
Avg train loss: 0.39160827547311783


100%|██████████| 4/4 [00:00<00:00, 1137.28it/s]


Epoch: 23
Avg train loss: 0.43054716289043427


100%|██████████| 4/4 [00:00<00:00, 1222.21it/s]


Epoch: 24
Avg train loss: 0.4483567848801613


100%|██████████| 4/4 [00:00<00:00, 1297.74it/s]


Epoch: 25
Avg train loss: 0.5092839300632477


100%|██████████| 4/4 [00:00<00:00, 1371.25it/s]


Epoch: 26
Avg train loss: 0.4568813443183899


100%|██████████| 4/4 [00:00<00:00, 1131.38it/s]


Epoch: 27
Avg train loss: 0.4598621726036072


100%|██████████| 4/4 [00:00<00:00, 1504.95it/s]


Epoch: 28
Avg train loss: 0.4080599397420883


100%|██████████| 4/4 [00:00<00:00, 1136.13it/s]


Epoch: 29
Avg train loss: 0.43443380296230316


100%|██████████| 4/4 [00:00<00:00, 1149.83it/s]


Epoch: 30
Avg train loss: 0.49313269555568695


100%|██████████| 4/4 [00:00<00:00, 1171.59it/s]


Epoch: 31
Avg train loss: 0.3874813914299011


100%|██████████| 4/4 [00:00<00:00, 1186.84it/s]


Epoch: 32
Avg train loss: 0.4099969193339348


100%|██████████| 4/4 [00:00<00:00, 1076.08it/s]


Epoch: 33
Avg train loss: 0.3939746022224426


100%|██████████| 4/4 [00:00<00:00, 1186.00it/s]


Epoch: 34
Avg train loss: 0.4955167770385742


100%|██████████| 4/4 [00:00<00:00, 1053.65it/s]


Epoch: 35
Avg train loss: 0.374330997467041


100%|██████████| 4/4 [00:00<00:00, 1327.21it/s]


Epoch: 36
Avg train loss: 0.3794885724782944


100%|██████████| 4/4 [00:00<00:00, 1388.73it/s]


Epoch: 37
Avg train loss: 0.47348523139953613


100%|██████████| 4/4 [00:00<00:00, 1307.55it/s]


Epoch: 38
Avg train loss: 0.4438256472349167


100%|██████████| 4/4 [00:00<00:00, 1452.20it/s]


Epoch: 39
Avg train loss: 0.39584895968437195


100%|██████████| 4/4 [00:00<00:00, 1400.08it/s]


Epoch: 40
Avg train loss: 0.3916240409016609


100%|██████████| 4/4 [00:00<00:00, 1341.21it/s]


Epoch: 41
Avg train loss: 0.39548005163669586


100%|██████████| 4/4 [00:00<00:00, 1281.88it/s]


Epoch: 42
Avg train loss: 0.4535951092839241


100%|██████████| 4/4 [00:00<00:00, 976.33it/s]


Epoch: 43
Avg train loss: 0.4753965213894844


100%|██████████| 4/4 [00:00<00:00, 1077.81it/s]


Epoch: 44
Avg train loss: 0.39604146778583527


100%|██████████| 4/4 [00:00<00:00, 1314.31it/s]


Epoch: 45
Avg train loss: 0.38376737385988235


100%|██████████| 4/4 [00:00<00:00, 1326.37it/s]


Epoch: 46
Avg train loss: 0.40953242778778076


100%|██████████| 4/4 [00:00<00:00, 1356.50it/s]


Epoch: 47
Avg train loss: 0.43603693693876266


100%|██████████| 4/4 [00:00<00:00, 1604.55it/s]


Epoch: 48
Avg train loss: 0.36195822432637215


100%|██████████| 4/4 [00:00<00:00, 1353.44it/s]


Epoch: 49
Avg train loss: 0.38536158949136734


100%|██████████| 4/4 [00:00<00:00, 1117.96it/s]


Epoch: 50
Avg train loss: 0.4267318621277809


100%|██████████| 4/4 [00:00<00:00, 1271.19it/s]


Epoch: 51
Avg train loss: 0.4128201901912689


100%|██████████| 4/4 [00:00<00:00, 1007.88it/s]


Epoch: 52
Avg train loss: 0.4303368031978607


100%|██████████| 4/4 [00:00<00:00, 1039.48it/s]


Epoch: 53
Avg train loss: 0.40811655670404434


100%|██████████| 4/4 [00:00<00:00, 1465.90it/s]


Epoch: 54
Avg train loss: 0.4046989902853966


100%|██████████| 4/4 [00:00<00:00, 1120.12it/s]


Epoch: 55
Avg train loss: 0.37664493173360825


100%|██████████| 4/4 [00:00<00:00, 1629.49it/s]


Epoch: 56
Avg train loss: 0.3988643288612366


100%|██████████| 4/4 [00:00<00:00, 1559.80it/s]


Epoch: 57
Avg train loss: 0.4700610190629959


100%|██████████| 4/4 [00:00<00:00, 1381.30it/s]


Epoch: 58
Avg train loss: 0.4089052453637123


100%|██████████| 4/4 [00:00<00:00, 1560.53it/s]


Epoch: 59
Avg train loss: 0.38642093539237976


100%|██████████| 4/4 [00:00<00:00, 1275.35it/s]


Epoch: 60
Avg train loss: 0.43463345617055893


100%|██████████| 4/4 [00:00<00:00, 1189.20it/s]


Epoch: 61
Avg train loss: 0.40952979028224945


100%|██████████| 4/4 [00:00<00:00, 1555.17it/s]


Epoch: 62
Avg train loss: 0.36929402127861977


100%|██████████| 4/4 [00:00<00:00, 1158.01it/s]


Epoch: 63
Avg train loss: 0.3782661557197571


100%|██████████| 4/4 [00:00<00:00, 1055.64it/s]


Epoch: 64
Avg train loss: 0.4008360207080841


100%|██████████| 4/4 [00:00<00:00, 1344.54it/s]


Epoch: 65
Avg train loss: 0.4217839017510414


100%|██████████| 4/4 [00:00<00:00, 1231.81it/s]


Epoch: 66
Avg train loss: 0.37720803916454315


100%|██████████| 4/4 [00:00<00:00, 1482.22it/s]


Epoch: 67
Avg train loss: 0.3405317757278681


100%|██████████| 4/4 [00:00<00:00, 1060.44it/s]


Epoch: 68
Avg train loss: 0.39031822234392166


100%|██████████| 4/4 [00:00<00:00, 1085.27it/s]


Epoch: 69
Avg train loss: 0.4295657202601433


100%|██████████| 4/4 [00:00<00:00, 1289.36it/s]


Epoch: 70
Avg train loss: 0.3710966408252716


100%|██████████| 4/4 [00:00<00:00, 1104.27it/s]


Epoch: 71
Avg train loss: 0.41811586171388626


100%|██████████| 4/4 [00:00<00:00, 1181.91it/s]


Epoch: 72
Avg train loss: 0.3615132048726082


100%|██████████| 4/4 [00:00<00:00, 1191.65it/s]


Epoch: 73
Avg train loss: 0.3945522680878639


100%|██████████| 4/4 [00:00<00:00, 1163.07it/s]


Epoch: 74
Avg train loss: 0.39461979269981384


100%|██████████| 4/4 [00:00<00:00, 1438.38it/s]


Epoch: 75
Avg train loss: 0.37057579308748245


100%|██████████| 4/4 [00:00<00:00, 1222.56it/s]


Epoch: 76
Avg train loss: 0.43126196414232254


100%|██████████| 4/4 [00:00<00:00, 1218.04it/s]


Epoch: 77
Avg train loss: 0.43086758255958557


100%|██████████| 4/4 [00:00<00:00, 1286.99it/s]


Epoch: 78
Avg train loss: 0.3704877495765686


100%|██████████| 4/4 [00:00<00:00, 1223.90it/s]


Epoch: 79
Avg train loss: 0.4815577194094658


100%|██████████| 4/4 [00:00<00:00, 1422.40it/s]


Epoch: 80
Avg train loss: 0.4191199988126755


100%|██████████| 4/4 [00:00<00:00, 1522.85it/s]


Epoch: 81
Avg train loss: 0.3991376534104347


100%|██████████| 4/4 [00:00<00:00, 1277.39it/s]


Epoch: 82
Avg train loss: 0.41120830923318863


100%|██████████| 4/4 [00:00<00:00, 1349.08it/s]


Epoch: 83
Avg train loss: 0.4331924542784691


100%|██████████| 4/4 [00:00<00:00, 1445.81it/s]


Epoch: 84
Avg train loss: 0.39308764040470123


100%|██████████| 4/4 [00:00<00:00, 1168.82it/s]


Epoch: 85
Avg train loss: 0.3730259835720062


100%|██████████| 4/4 [00:00<00:00, 1097.99it/s]


Epoch: 86
Avg train loss: 0.4453992247581482


100%|██████████| 4/4 [00:00<00:00, 1323.96it/s]


Epoch: 87
Avg train loss: 0.4028027579188347


100%|██████████| 4/4 [00:00<00:00, 1436.28it/s]


Epoch: 88
Avg train loss: 0.34146464243531227


100%|██████████| 4/4 [00:00<00:00, 1359.47it/s]


Epoch: 89
Avg train loss: 0.409750834107399


100%|██████████| 4/4 [00:00<00:00, 1382.78it/s]


Epoch: 90
Avg train loss: 0.37737491726875305


100%|██████████| 4/4 [00:00<00:00, 1258.42it/s]


Epoch: 91
Avg train loss: 0.4539794698357582


100%|██████████| 4/4 [00:00<00:00, 1354.97it/s]


Epoch: 92
Avg train loss: 0.416384257376194


100%|██████████| 4/4 [00:00<00:00, 1323.12it/s]


Epoch: 93
Avg train loss: 0.45327042043209076


100%|██████████| 4/4 [00:00<00:00, 1528.54it/s]


Epoch: 94
Avg train loss: 0.4528820440173149


100%|██████████| 4/4 [00:00<00:00, 1603.94it/s]


Epoch: 95
Avg train loss: 0.4530060812830925


100%|██████████| 4/4 [00:00<00:00, 1465.00it/s]


Epoch: 96
Avg train loss: 0.4304511696100235


100%|██████████| 4/4 [00:00<00:00, 1594.64it/s]


Epoch: 97
Avg train loss: 0.48919445276260376


100%|██████████| 4/4 [00:00<00:00, 1490.38it/s]


Epoch: 98
Avg train loss: 0.43085866421461105


100%|██████████| 4/4 [00:00<00:00, 1474.14it/s]


Epoch: 99
Avg train loss: 0.40966980904340744


100%|██████████| 4/4 [00:00<00:00, 1515.83it/s]


Epoch: 100
Avg train loss: 0.4019298106431961


100%|██████████| 4/4 [00:00<00:00, 1532.31it/s]


Epoch: 101
Avg train loss: 0.38676489889621735


100%|██████████| 4/4 [00:00<00:00, 1391.61it/s]


Epoch: 102
Avg train loss: 0.3740972876548767


100%|██████████| 4/4 [00:00<00:00, 1017.79it/s]


Epoch: 103
Avg train loss: 0.4501679986715317


100%|██████████| 4/4 [00:00<00:00, 1180.25it/s]


Epoch: 104
Avg train loss: 0.36953046172857285


100%|██████████| 4/4 [00:00<00:00, 1374.17it/s]


Epoch: 105
Avg train loss: 0.39116620272397995


100%|██████████| 4/4 [00:00<00:00, 1053.25it/s]


Epoch: 106
Avg train loss: 0.44600677490234375


100%|██████████| 4/4 [00:00<00:00, 916.64it/s]


Epoch: 107
Avg train loss: 0.39931224286556244


100%|██████████| 4/4 [00:00<00:00, 1367.00it/s]


Epoch: 108
Avg train loss: 0.3867955356836319


100%|██████████| 4/4 [00:00<00:00, 934.09it/s]


Epoch: 109
Avg train loss: 0.38286471366882324


100%|██████████| 4/4 [00:00<00:00, 1158.57it/s]


Epoch: 110
Avg train loss: 0.4059769883751869


100%|██████████| 4/4 [00:00<00:00, 854.63it/s]


Epoch: 111
Avg train loss: 0.36867328733205795


100%|██████████| 4/4 [00:00<00:00, 972.93it/s]


Epoch: 112
Avg train loss: 0.36383508518338203


100%|██████████| 4/4 [00:00<00:00, 878.62it/s]


Epoch: 113
Avg train loss: 0.3732590079307556


100%|██████████| 4/4 [00:00<00:00, 942.59it/s]


Epoch: 114
Avg train loss: 0.3768870085477829


100%|██████████| 4/4 [00:00<00:00, 949.31it/s]


Epoch: 115
Avg train loss: 0.3700132444500923


100%|██████████| 4/4 [00:00<00:00, 1081.77it/s]


Epoch: 116
Avg train loss: 0.38281016051769257


100%|██████████| 4/4 [00:00<00:00, 900.11it/s]


Epoch: 117
Avg train loss: 0.4047180488705635


100%|██████████| 4/4 [00:00<00:00, 1121.32it/s]


Epoch: 118
Avg train loss: 0.4391310289502144


100%|██████████| 4/4 [00:00<00:00, 1374.62it/s]


Epoch: 119
Avg train loss: 0.39311446249485016


100%|██████████| 4/4 [00:00<00:00, 1037.17it/s]


Epoch: 120
Avg train loss: 0.38542958348989487


100%|██████████| 4/4 [00:00<00:00, 1193.68it/s]


Epoch: 121
Avg train loss: 0.4237988740205765


100%|██████████| 4/4 [00:00<00:00, 1164.03it/s]


Epoch: 122
Avg train loss: 0.43954193592071533


100%|██████████| 4/4 [00:00<00:00, 1471.56it/s]


Epoch: 123
Avg train loss: 0.36965513229370117


100%|██████████| 4/4 [00:00<00:00, 1260.78it/s]


Epoch: 124
Avg train loss: 0.36354444175958633


100%|██████████| 4/4 [00:00<00:00, 1114.39it/s]


Epoch: 125
Avg train loss: 0.34727923572063446


100%|██████████| 4/4 [00:00<00:00, 968.83it/s]


Epoch: 126
Avg train loss: 0.4117705002427101


100%|██████████| 4/4 [00:00<00:00, 1252.40it/s]


Epoch: 127
Avg train loss: 0.37120237201452255


100%|██████████| 4/4 [00:00<00:00, 965.37it/s]


Epoch: 128
Avg train loss: 0.42476730048656464


100%|██████████| 4/4 [00:00<00:00, 1020.88it/s]


Epoch: 129
Avg train loss: 0.3631638139486313


100%|██████████| 4/4 [00:00<00:00, 1042.84it/s]


Epoch: 130
Avg train loss: 0.43235543370246887


100%|██████████| 4/4 [00:00<00:00, 1278.65it/s]


Epoch: 131
Avg train loss: 0.4709842652082443


100%|██████████| 4/4 [00:00<00:00, 1263.63it/s]


Epoch: 132
Avg train loss: 0.5062096193432808


100%|██████████| 4/4 [00:00<00:00, 1447.06it/s]


Epoch: 133
Avg train loss: 0.43860433250665665


100%|██████████| 4/4 [00:00<00:00, 1349.19it/s]


Epoch: 134
Avg train loss: 0.39276203513145447


100%|██████████| 4/4 [00:00<00:00, 1252.59it/s]


Epoch: 135
Avg train loss: 0.3914458826184273


100%|██████████| 4/4 [00:00<00:00, 1062.32it/s]


Epoch: 136
Avg train loss: 0.4663781076669693


100%|██████████| 4/4 [00:00<00:00, 1398.33it/s]


Epoch: 137
Avg train loss: 0.3925841599702835


100%|██████████| 4/4 [00:00<00:00, 1190.97it/s]


Epoch: 138
Avg train loss: 0.3619811087846756


100%|██████████| 4/4 [00:00<00:00, 1134.98it/s]


Epoch: 139
Avg train loss: 0.3790999501943588


100%|██████████| 4/4 [00:00<00:00, 964.15it/s]


Epoch: 140
Avg train loss: 0.4119615852832794


100%|██████████| 4/4 [00:00<00:00, 1003.84it/s]


Epoch: 141
Avg train loss: 0.3937331661581993


100%|██████████| 4/4 [00:00<00:00, 1033.21it/s]


Epoch: 142
Avg train loss: 0.3466532342135906


100%|██████████| 4/4 [00:00<00:00, 927.48it/s]


Epoch: 143
Avg train loss: 0.327309800311923


100%|██████████| 4/4 [00:00<00:00, 905.36it/s]


Epoch: 144
Avg train loss: 0.451125405728817


100%|██████████| 4/4 [00:00<00:00, 1463.73it/s]


Epoch: 145
Avg train loss: 0.3686530143022537


100%|██████████| 4/4 [00:00<00:00, 1098.49it/s]


Epoch: 146
Avg train loss: 0.36415359377861023


100%|██████████| 4/4 [00:00<00:00, 1114.62it/s]


Epoch: 147
Avg train loss: 0.418741412460804


100%|██████████| 4/4 [00:00<00:00, 1185.84it/s]


Epoch: 148
Avg train loss: 0.41780922561883926


100%|██████████| 4/4 [00:00<00:00, 865.83it/s]


Epoch: 149
Avg train loss: 0.4310096725821495


100%|██████████| 4/4 [00:00<00:00, 1189.20it/s]


Epoch: 150
Avg train loss: 0.34949642419815063


100%|██████████| 4/4 [00:00<00:00, 1246.64it/s]


Epoch: 151
Avg train loss: 0.3820420429110527


100%|██████████| 4/4 [00:00<00:00, 1143.56it/s]


Epoch: 152
Avg train loss: 0.45627258718013763


100%|██████████| 4/4 [00:00<00:00, 1292.24it/s]


Epoch: 153
Avg train loss: 0.35547398775815964


100%|██████████| 4/4 [00:00<00:00, 1130.77it/s]


Epoch: 154
Avg train loss: 0.36723098903894424


100%|██████████| 4/4 [00:00<00:00, 821.89it/s]


Epoch: 155
Avg train loss: 0.38902585953474045


100%|██████████| 4/4 [00:00<00:00, 1011.47it/s]


Epoch: 156
Avg train loss: 0.3730260878801346


100%|██████████| 4/4 [00:00<00:00, 1243.59it/s]


Epoch: 157
Avg train loss: 0.4027625098824501


100%|██████████| 4/4 [00:00<00:00, 954.88it/s]


Epoch: 158
Avg train loss: 0.37036827206611633


100%|██████████| 4/4 [00:00<00:00, 1067.25it/s]


Epoch: 159
Avg train loss: 0.4118941053748131


100%|██████████| 4/4 [00:00<00:00, 1128.03it/s]


Epoch: 160
Avg train loss: 0.4205782637000084


100%|██████████| 4/4 [00:00<00:00, 933.73it/s]


Epoch: 161
Avg train loss: 0.3273808918893337


100%|██████████| 4/4 [00:00<00:00, 1033.84it/s]


Epoch: 162
Avg train loss: 0.35504428297281265


100%|██████████| 4/4 [00:00<00:00, 971.18it/s]


Epoch: 163
Avg train loss: 0.4290112778544426


100%|██████████| 4/4 [00:00<00:00, 1145.59it/s]


Epoch: 164
Avg train loss: 0.3752741739153862


100%|██████████| 4/4 [00:00<00:00, 964.48it/s]


Epoch: 165
Avg train loss: 0.365288570523262


100%|██████████| 4/4 [00:00<00:00, 1044.01it/s]


Epoch: 166
Avg train loss: 0.39206062257289886


100%|██████████| 4/4 [00:00<00:00, 1289.86it/s]


Epoch: 167
Avg train loss: 0.47186198830604553


100%|██████████| 4/4 [00:00<00:00, 1324.69it/s]


Epoch: 168
Avg train loss: 0.3965086042881012


100%|██████████| 4/4 [00:00<00:00, 1399.50it/s]


Epoch: 169
Avg train loss: 0.4262341260910034


100%|██████████| 4/4 [00:00<00:00, 868.79it/s]


Epoch: 170
Avg train loss: 0.32853239215910435


100%|██████████| 4/4 [00:00<00:00, 1185.92it/s]


Epoch: 171
Avg train loss: 0.3536225035786629


100%|██████████| 4/4 [00:00<00:00, 1076.70it/s]


Epoch: 172
Avg train loss: 0.34319864213466644


100%|██████████| 4/4 [00:00<00:00, 985.27it/s]


Epoch: 173
Avg train loss: 0.3604872301220894


100%|██████████| 4/4 [00:00<00:00, 1280.61it/s]


Epoch: 174
Avg train loss: 0.36804071068763733


100%|██████████| 4/4 [00:00<00:00, 1322.71it/s]


Epoch: 175
Avg train loss: 0.3315359763801098


100%|██████████| 4/4 [00:00<00:00, 833.44it/s]


Epoch: 176
Avg train loss: 0.3560023605823517


100%|██████████| 4/4 [00:00<00:00, 937.27it/s]


Epoch: 177
Avg train loss: 0.3859099820256233


100%|██████████| 4/4 [00:00<00:00, 546.83it/s]


Epoch: 178
Avg train loss: 0.3477208577096462


100%|██████████| 4/4 [00:00<00:00, 698.21it/s]


Epoch: 179
Avg train loss: 0.3703140616416931


100%|██████████| 4/4 [00:00<00:00, 1237.62it/s]


Epoch: 180
Avg train loss: 0.45236366987228394


100%|██████████| 4/4 [00:00<00:00, 1074.91it/s]


Epoch: 181
Avg train loss: 0.393212653696537


100%|██████████| 4/4 [00:00<00:00, 1196.75it/s]


Epoch: 182
Avg train loss: 0.4290210157632828


100%|██████████| 4/4 [00:00<00:00, 1312.57it/s]


Epoch: 183
Avg train loss: 0.4273853600025177


100%|██████████| 4/4 [00:00<00:00, 1185.84it/s]


Epoch: 184
Avg train loss: 0.4257214069366455


100%|██████████| 4/4 [00:00<00:00, 1073.95it/s]


Epoch: 185
Avg train loss: 0.3628831133246422


100%|██████████| 4/4 [00:00<00:00, 1394.85it/s]


Epoch: 186
Avg train loss: 0.3560945466160774


100%|██████████| 4/4 [00:00<00:00, 1274.28it/s]


Epoch: 187
Avg train loss: 0.43627452105283737


100%|██████████| 4/4 [00:00<00:00, 1389.53it/s]


Epoch: 188
Avg train loss: 0.3646080046892166


100%|██████████| 4/4 [00:00<00:00, 1536.09it/s]


Epoch: 189
Avg train loss: 0.3632093742489815


100%|██████████| 4/4 [00:00<00:00, 1625.54it/s]


Epoch: 190
Avg train loss: 0.35782869160175323


100%|██████████| 4/4 [00:00<00:00, 1571.49it/s]


Epoch: 191
Avg train loss: 0.39850878715515137


100%|██████████| 4/4 [00:00<00:00, 1252.50it/s]


Epoch: 192
Avg train loss: 0.35884176194667816


100%|██████████| 4/4 [00:00<00:00, 1328.15it/s]


Epoch: 193
Avg train loss: 0.40697960555553436


100%|██████████| 4/4 [00:00<00:00, 1318.76it/s]


Epoch: 194
Avg train loss: 0.390120767056942


100%|██████████| 4/4 [00:00<00:00, 1217.33it/s]


Epoch: 195
Avg train loss: 0.3704477697610855


100%|██████████| 4/4 [00:00<00:00, 1109.68it/s]


Epoch: 196
Avg train loss: 0.3445655517280102


100%|██████████| 4/4 [00:00<00:00, 1316.79it/s]


Epoch: 197
Avg train loss: 0.3880877047777176


100%|██████████| 4/4 [00:00<00:00, 898.47it/s]


Epoch: 198
Avg train loss: 0.34847117215394974


100%|██████████| 4/4 [00:00<00:00, 855.50it/s]


Epoch: 199
Avg train loss: 0.42023393511772156


In [160]:
test_feature_tensor = torch.tensor(all_test_feature_vectors, dtype=torch.float32)
true_test_labels_tensor = torch.tensor(test_labels, dtype=torch.int)

# Get predicted probabilities from the model
with torch.no_grad():  # No gradient calculation needed for evaluation
    test_probs = model(test_feature_tensor)

# Convert probabilities to binary labels
predicted_test_labels = model.logprob2label(test_probs)

# Convert tensors to lists for metric calculation
predicted_test_labels = predicted_test_labels.squeeze().tolist()
true_test_labels = true_test_labels_tensor.tolist()

# Calculate precision, recall, and F1-score
precision = precision_score(true_test_labels, predicted_test_labels)
recall = recall_score(true_test_labels, predicted_test_labels)
f1 = f1_score(true_test_labels, predicted_test_labels)

# Print results
print(f'Precision for Test: {precision:.4f}')
print(f'Recall for Test: {recall:.4f}')
print(f'F1-score for Test: {f1:.4f}')


Precision for Test: 0.8462
Recall for Test: 0.8800
F1-score for Test: 0.8627


In [161]:
all_test_feature_vectors_wthot_nrmlz = [featurize_text(review) for review in test_texts]

In [162]:
num_epochs = 100
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
batch_size = 16
for epoch in range(num_epochs):
    samples = list(zip(all_test_feature_vectors_wthot_nrmlz, test_labels_tensor))
    random.shuffle(samples)
    batches = [samples[i:i + batch_size] for i in range(0, len(samples), batch_size)]

    epoch_i_train_losses = []

    for batch in tqdm(batches):
        feature_vectors, labels = zip(*batch)
        
        # Convert to tensors
        feature_vectors = torch.tensor(feature_vectors, dtype=torch.float32)
        labels = torch.tensor(labels, dtype=torch.float32).unsqueeze(1)  # Ensure labels are 2D
        
        # Step 1: Zero gradients
        model.zero_grad()

        # Step 2: Forward pass
        log_probs = model(feature_vectors)

        # Step 3: Compute loss
        loss = loss_function(log_probs, labels)

        # Step 4: Backpropagation and optimization
        loss.backward()
        optimizer.step()

        # Store loss
        epoch_i_train_losses.append(loss.item())
test_feature_tensor = torch.tensor(all_test_feature_vectors, dtype=torch.float32)
true_test_labels_tensor = torch.tensor(test_labels, dtype=torch.int)

# Get predicted probabilities from the model
with torch.no_grad():  # No gradient calculation needed for evaluation
    test_probs = model(test_feature_tensor)

# Convert probabilities to binary labels
predicted_test_labels = model.logprob2label(test_probs)

# Convert tensors to lists for metric calculation
predicted_test_labels = predicted_test_labels.squeeze().tolist()
true_test_labels = true_test_labels_tensor.tolist()

# Calculate precision, recall, and F1-score
precision = precision_score(true_test_labels, predicted_test_labels)
recall = recall_score(true_test_labels, predicted_test_labels)
f1 = f1_score(true_test_labels, predicted_test_labels)

# Print results
print(f'Precision for Test: {precision:.4f}')
print(f'Recall for Test: {recall:.4f}')
print(f'F1-score for Test: {f1:.4f}')

100%|██████████| 4/4 [00:00<00:00, 1153.07it/s]
100%|██████████| 4/4 [00:00<00:00, 1561.83it/s]
100%|██████████| 4/4 [00:00<00:00, 1717.75it/s]
100%|██████████| 4/4 [00:00<00:00, 1673.04it/s]
100%|██████████| 4/4 [00:00<00:00, 1269.46it/s]
100%|██████████| 4/4 [00:00<00:00, 1220.16it/s]
100%|██████████| 4/4 [00:00<00:00, 1567.38it/s]
100%|██████████| 4/4 [00:00<00:00, 1761.39it/s]
100%|██████████| 4/4 [00:00<00:00, 1259.65it/s]
100%|██████████| 4/4 [00:00<00:00, 1531.89it/s]
100%|██████████| 4/4 [00:00<00:00, 1517.75it/s]
100%|██████████| 4/4 [00:00<00:00, 1437.88it/s]
100%|██████████| 4/4 [00:00<00:00, 1525.76it/s]
100%|██████████| 4/4 [00:00<00:00, 1582.91it/s]
100%|██████████| 4/4 [00:00<00:00, 1610.41it/s]
100%|██████████| 4/4 [00:00<00:00, 1447.06it/s]
100%|██████████| 4/4 [00:00<00:00, 1506.98it/s]
100%|██████████| 4/4 [00:00<00:00, 1361.35it/s]
100%|██████████| 4/4 [00:00<00:00, 1554.60it/s]
100%|██████████| 4/4 [00:00<00:00, 1566.94it/s]
100%|██████████| 4/4 [00:00<00:00, 1538.

Precision for Test: 0.8462
Recall for Test: 0.8800
F1-score for Test: 0.8627


In [163]:
def featurize_text_5d(text):
    # Tokenize text
    words = text.lower().split()
    positive_count = sum(1 for word in words if word in positive_lexicon)
    negative_count = sum(1 for word in words if word in negative_lexicon)
    pronouns = {"i", "me", "my", "mine", "we", "us", "our", "ours", "you", "your", "yours"}
    pronoun_count = sum(1 for word in words if word in pronouns)
    contains_exclamation = 1.0 if "!" in text else 0.0
    log_word_count = math.log(len(words)) if len(words) > 0 else 0.0
    feature_vector = [positive_count, negative_count, pronoun_count, contains_exclamation, log_word_count]
    return feature_vector

In [164]:
all_test_feature_vectors_5d = [featurize_text(review) for review in test_texts]
all_test_feature_vectors_5d = normalize(all_test_feature_vectors_5d)

test_labels_tensor = torch.tensor(test_labels, dtype=torch.float32).unsqueeze(1)  

for epoch in range(num_epochs):
    samples = list(zip(all_test_feature_vectors_5d, test_labels_tensor))
    random.shuffle(samples)
    batches = [samples[i:i + batch_size] for i in range(0, len(samples), batch_size)]

    epoch_i_train_losses = []

    for batch in tqdm(batches):
        feature_vectors, labels = zip(*batch)
        
        # Convert to tensors
        feature_vectors = torch.tensor(feature_vectors, dtype=torch.float32)
        labels = torch.tensor(labels, dtype=torch.float32).unsqueeze(1)  # Ensure labels are 2D
        
        # Step 1: Zero gradients
        model.zero_grad()

        # Step 2: Forward pass
        log_probs = model(feature_vectors)

        # Step 3: Compute loss
        loss = loss_function(log_probs, labels)

        # Step 4: Backpropagation and optimization
        loss.backward()
        optimizer.step()

        # Store loss
        epoch_i_train_losses.append(loss.item())
    
    # Print the average loss for this epoch
    print('Epoch:', epoch)
    print('Avg train loss:', sum(epoch_i_train_losses) / len(epoch_i_train_losses))

100%|██████████| 4/4 [00:00<00:00, 1169.63it/s]


Epoch: 0
Avg train loss: 0.35710281878709793


100%|██████████| 4/4 [00:00<00:00, 1631.71it/s]


Epoch: 1
Avg train loss: 0.37299320846796036


100%|██████████| 4/4 [00:00<00:00, 1295.64it/s]


Epoch: 2
Avg train loss: 0.375732459127903


100%|██████████| 4/4 [00:00<00:00, 1353.11it/s]


Epoch: 3
Avg train loss: 0.5143864080309868


100%|██████████| 4/4 [00:00<00:00, 1227.21it/s]


Epoch: 4
Avg train loss: 0.42701175063848495


100%|██████████| 4/4 [00:00<00:00, 1170.12it/s]


Epoch: 5
Avg train loss: 0.41200660914182663


100%|██████████| 4/4 [00:00<00:00, 1094.62it/s]


Epoch: 6
Avg train loss: 0.3412524051964283


100%|██████████| 4/4 [00:00<00:00, 1699.30it/s]


Epoch: 7
Avg train loss: 0.3807521462440491


100%|██████████| 4/4 [00:00<00:00, 1731.75it/s]


Epoch: 8
Avg train loss: 0.3398226350545883


100%|██████████| 4/4 [00:00<00:00, 1426.39it/s]


Epoch: 9
Avg train loss: 0.3384001925587654


100%|██████████| 4/4 [00:00<00:00, 1304.60it/s]


Epoch: 10
Avg train loss: 0.336553480476141


100%|██████████| 4/4 [00:00<00:00, 1349.30it/s]


Epoch: 11
Avg train loss: 0.3807424083352089


100%|██████████| 4/4 [00:00<00:00, 1411.87it/s]


Epoch: 12
Avg train loss: 0.4202222004532814


100%|██████████| 4/4 [00:00<00:00, 1440.48it/s]


Epoch: 13
Avg train loss: 0.42597200721502304


100%|██████████| 4/4 [00:00<00:00, 1715.81it/s]


Epoch: 14
Avg train loss: 0.3568495139479637


100%|██████████| 4/4 [00:00<00:00, 1929.08it/s]


Epoch: 15
Avg train loss: 0.3556213453412056


100%|██████████| 4/4 [00:00<00:00, 1547.57it/s]


Epoch: 16
Avg train loss: 0.3404543623328209


100%|██████████| 4/4 [00:00<00:00, 1702.41it/s]


Epoch: 17
Avg train loss: 0.34883733838796616


100%|██████████| 4/4 [00:00<00:00, 1647.73it/s]


Epoch: 18
Avg train loss: 0.3424655310809612


100%|██████████| 4/4 [00:00<00:00, 1676.38it/s]


Epoch: 19
Avg train loss: 0.3590765967965126


100%|██████████| 4/4 [00:00<00:00, 1578.73it/s]


Epoch: 20
Avg train loss: 0.29833816457539797


100%|██████████| 4/4 [00:00<00:00, 1491.31it/s]


Epoch: 21
Avg train loss: 0.3511611297726631


100%|██████████| 4/4 [00:00<00:00, 1195.90it/s]


Epoch: 22
Avg train loss: 0.320486169308424


100%|██████████| 4/4 [00:00<00:00, 1387.46it/s]


Epoch: 23
Avg train loss: 0.4190233647823334


100%|██████████| 4/4 [00:00<00:00, 1391.49it/s]


Epoch: 24
Avg train loss: 0.3186142724007368


100%|██████████| 4/4 [00:00<00:00, 1565.18it/s]


Epoch: 25
Avg train loss: 0.388553224503994


100%|██████████| 4/4 [00:00<00:00, 1546.29it/s]


Epoch: 26
Avg train loss: 0.35489609092473984


100%|██████████| 4/4 [00:00<00:00, 1343.47it/s]


Epoch: 27
Avg train loss: 0.34001102298498154


100%|██████████| 4/4 [00:00<00:00, 1496.76it/s]


Epoch: 28
Avg train loss: 0.39628157764673233


100%|██████████| 4/4 [00:00<00:00, 1438.62it/s]


Epoch: 29
Avg train loss: 0.39144090563058853


100%|██████████| 4/4 [00:00<00:00, 1425.54it/s]


Epoch: 30
Avg train loss: 0.36444953083992004


100%|██████████| 4/4 [00:00<00:00, 1553.59it/s]


Epoch: 31
Avg train loss: 0.3174395076930523


100%|██████████| 4/4 [00:00<00:00, 1869.95it/s]


Epoch: 32
Avg train loss: 0.37956133484840393


100%|██████████| 4/4 [00:00<00:00, 1783.86it/s]


Epoch: 33
Avg train loss: 0.4282711148262024


100%|██████████| 4/4 [00:00<00:00, 1682.77it/s]


Epoch: 34
Avg train loss: 0.3552357256412506


100%|██████████| 4/4 [00:00<00:00, 1687.00it/s]


Epoch: 35
Avg train loss: 0.32641492784023285


100%|██████████| 4/4 [00:00<00:00, 1641.45it/s]


Epoch: 36
Avg train loss: 0.43589505553245544


100%|██████████| 4/4 [00:00<00:00, 1793.20it/s]


Epoch: 37
Avg train loss: 0.3771766498684883


100%|██████████| 4/4 [00:00<00:00, 1672.87it/s]


Epoch: 38
Avg train loss: 0.363274410367012


100%|██████████| 4/4 [00:00<00:00, 1365.44it/s]


Epoch: 39
Avg train loss: 0.344760462641716


100%|██████████| 4/4 [00:00<00:00, 1444.69it/s]


Epoch: 40
Avg train loss: 0.36017579585313797


100%|██████████| 4/4 [00:00<00:00, 1374.51it/s]


Epoch: 41
Avg train loss: 0.3441437855362892


100%|██████████| 4/4 [00:00<00:00, 1685.14it/s]


Epoch: 42
Avg train loss: 0.3990989401936531


100%|██████████| 4/4 [00:00<00:00, 1498.10it/s]


Epoch: 43
Avg train loss: 0.36778050661087036


100%|██████████| 4/4 [00:00<00:00, 1535.67it/s]


Epoch: 44
Avg train loss: 0.3039034325629473


100%|██████████| 4/4 [00:00<00:00, 1648.38it/s]


Epoch: 45
Avg train loss: 0.34968970715999603


100%|██████████| 4/4 [00:00<00:00, 1505.22it/s]


Epoch: 46
Avg train loss: 0.3256383016705513


100%|██████████| 4/4 [00:00<00:00, 1620.99it/s]


Epoch: 47
Avg train loss: 0.3894946202635765


100%|██████████| 4/4 [00:00<00:00, 1068.34it/s]


Epoch: 48
Avg train loss: 0.3596590682864189


100%|██████████| 4/4 [00:00<00:00, 1233.26it/s]


Epoch: 49
Avg train loss: 0.38106197118759155


100%|██████████| 4/4 [00:00<00:00, 1447.06it/s]


Epoch: 50
Avg train loss: 0.34902605414390564


100%|██████████| 4/4 [00:00<00:00, 1535.39it/s]


Epoch: 51
Avg train loss: 0.40264227986335754


100%|██████████| 4/4 [00:00<00:00, 1590.26it/s]


Epoch: 52
Avg train loss: 0.41110875457525253


100%|██████████| 4/4 [00:00<00:00, 1694.15it/s]


Epoch: 53
Avg train loss: 0.3715982213616371


100%|██████████| 4/4 [00:00<00:00, 1795.70it/s]


Epoch: 54
Avg train loss: 0.39346449822187424


100%|██████████| 4/4 [00:00<00:00, 1538.35it/s]


Epoch: 55
Avg train loss: 0.3172682225704193


100%|██████████| 4/4 [00:00<00:00, 1292.74it/s]


Epoch: 56
Avg train loss: 0.3348362222313881


100%|██████████| 4/4 [00:00<00:00, 1161.13it/s]


Epoch: 57
Avg train loss: 0.35729852318763733


100%|██████████| 4/4 [00:00<00:00, 1222.74it/s]


Epoch: 58
Avg train loss: 0.35211724787950516


100%|██████████| 4/4 [00:00<00:00, 1477.65it/s]


Epoch: 59
Avg train loss: 0.3444691151380539


100%|██████████| 4/4 [00:00<00:00, 1052.92it/s]


Epoch: 60
Avg train loss: 0.36139536648988724


100%|██████████| 4/4 [00:00<00:00, 1031.11it/s]


Epoch: 61
Avg train loss: 0.3659399598836899


100%|██████████| 4/4 [00:00<00:00, 1149.44it/s]


Epoch: 62
Avg train loss: 0.3209732845425606


100%|██████████| 4/4 [00:00<00:00, 1336.72it/s]


Epoch: 63
Avg train loss: 0.3672337159514427


100%|██████████| 4/4 [00:00<00:00, 1702.58it/s]


Epoch: 64
Avg train loss: 0.37042343616485596


100%|██████████| 4/4 [00:00<00:00, 1472.20it/s]


Epoch: 65
Avg train loss: 0.43454425036907196


100%|██████████| 4/4 [00:00<00:00, 1368.01it/s]


Epoch: 66
Avg train loss: 0.510901190340519


100%|██████████| 4/4 [00:00<00:00, 1449.81it/s]


Epoch: 67
Avg train loss: 0.4286624789237976


100%|██████████| 4/4 [00:00<00:00, 1642.09it/s]


Epoch: 68
Avg train loss: 0.3201831169426441


100%|██████████| 4/4 [00:00<00:00, 1572.37it/s]


Epoch: 69
Avg train loss: 0.3591652065515518


100%|██████████| 4/4 [00:00<00:00, 1547.57it/s]


Epoch: 70
Avg train loss: 0.4291553348302841


100%|██████████| 4/4 [00:00<00:00, 1312.46it/s]


Epoch: 71
Avg train loss: 0.38709377497434616


100%|██████████| 4/4 [00:00<00:00, 1536.38it/s]


Epoch: 72
Avg train loss: 0.36178602278232574


100%|██████████| 4/4 [00:00<00:00, 1597.53it/s]


Epoch: 73
Avg train loss: 0.38926324993371964


100%|██████████| 4/4 [00:00<00:00, 1652.44it/s]


Epoch: 74
Avg train loss: 0.37237755954265594


100%|██████████| 4/4 [00:00<00:00, 1456.73it/s]


Epoch: 75
Avg train loss: 0.3017716482281685


100%|██████████| 4/4 [00:00<00:00, 1615.99it/s]


Epoch: 76
Avg train loss: 0.3208911456167698


100%|██████████| 4/4 [00:00<00:00, 1827.98it/s]


Epoch: 77
Avg train loss: 0.42581284046173096


100%|██████████| 4/4 [00:00<00:00, 1655.21it/s]


Epoch: 78
Avg train loss: 0.363970585167408


100%|██████████| 4/4 [00:00<00:00, 1670.37it/s]


Epoch: 79
Avg train loss: 0.35104984045028687


100%|██████████| 4/4 [00:00<00:00, 1697.93it/s]


Epoch: 80
Avg train loss: 0.3493179455399513


100%|██████████| 4/4 [00:00<00:00, 1793.01it/s]


Epoch: 81
Avg train loss: 0.3505406081676483


100%|██████████| 4/4 [00:00<00:00, 1392.65it/s]


Epoch: 82
Avg train loss: 0.38676033169031143


100%|██████████| 4/4 [00:00<00:00, 1423.37it/s]


Epoch: 83
Avg train loss: 0.36283718049526215


100%|██████████| 4/4 [00:00<00:00, 1555.32it/s]


Epoch: 84
Avg train loss: 0.3392651304602623


100%|██████████| 4/4 [00:00<00:00, 1517.34it/s]


Epoch: 85
Avg train loss: 0.39061201363801956


100%|██████████| 4/4 [00:00<00:00, 1409.85it/s]


Epoch: 86
Avg train loss: 0.44145307689905167


100%|██████████| 4/4 [00:00<00:00, 1306.54it/s]


Epoch: 87
Avg train loss: 0.3275335878133774


100%|██████████| 4/4 [00:00<00:00, 1325.53it/s]


Epoch: 88
Avg train loss: 0.43780136853456497


100%|██████████| 4/4 [00:00<00:00, 1409.02it/s]


Epoch: 89
Avg train loss: 0.32034002244472504


100%|██████████| 4/4 [00:00<00:00, 1582.16it/s]


Epoch: 90
Avg train loss: 0.328756432980299


100%|██████████| 4/4 [00:00<00:00, 1426.39it/s]


Epoch: 91
Avg train loss: 0.3104358948767185


100%|██████████| 4/4 [00:00<00:00, 1400.67it/s]


Epoch: 92
Avg train loss: 0.3511588051915169


100%|██████████| 4/4 [00:00<00:00, 1414.61it/s]


Epoch: 93
Avg train loss: 0.3390834331512451


100%|██████████| 4/4 [00:00<00:00, 1453.83it/s]


Epoch: 94
Avg train loss: 0.3416454493999481


100%|██████████| 4/4 [00:00<00:00, 1362.01it/s]


Epoch: 95
Avg train loss: 0.4144529178738594


100%|██████████| 4/4 [00:00<00:00, 1213.98it/s]


Epoch: 96
Avg train loss: 0.44351623952388763


100%|██████████| 4/4 [00:00<00:00, 1331.21it/s]


Epoch: 97
Avg train loss: 0.35709116607904434


100%|██████████| 4/4 [00:00<00:00, 1373.83it/s]


Epoch: 98
Avg train loss: 0.4188472330570221


100%|██████████| 4/4 [00:00<00:00, 1505.76it/s]


Epoch: 99
Avg train loss: 0.3172076679766178


In [165]:
test_feature_tensor = torch.tensor(all_test_feature_vectors_5d, dtype=torch.float32)
true_test_labels_tensor = torch.tensor(test_labels, dtype=torch.int)

# Get predicted probabilities from the model
with torch.no_grad():  # No gradient calculation needed for evaluation
    test_probs = model(test_feature_tensor)

# Convert probabilities to binary labels
predicted_test_labels = model.logprob2label(test_probs)

# Convert tensors to lists for metric calculation
predicted_test_labels = predicted_test_labels.squeeze().tolist()
true_test_labels = true_test_labels_tensor.tolist()

# Calculate precision, recall, and F1-score
precision = precision_score(true_test_labels, predicted_test_labels)
recall = recall_score(true_test_labels, predicted_test_labels)
f1 = f1_score(true_test_labels, predicted_test_labels)

# Print results
print(f'Precision for Test: {precision:.4f}')
print(f'Recall for Test: {recall:.4f}')
print(f'F1-score for Test: {f1:.4f}')


Precision for Test: 0.8519
Recall for Test: 0.9200
F1-score for Test: 0.8846


# **REPORT**

## 1. Describe the details for your best performing model: parameters such as number of epochs, batch size, and learning rate. Compare and contrast with other parameter settings you used—feel free to use graphs or tables to compare. Why do you think it performed better than the other models?

Ans:
For my best performing model, I used the following hyperparameters:
I experimented with different configurations of these parameters to see their impact on performance. Below, I’ll describe the different settings I tried and compare the performance of each.

Comparison of Hyperparameter Settings
| Parameter Setting | Epochs | Batch Size | Learning Rate | F1 Score (Avg) | Train Loss (Avg) |
|-------------------|--------|------------|---------------|----------------|------------------|
| Model 1           | 100    | 16         | 0.1           | 0.86           | 0.44             |
| Model 2           | 100    | 20         | 0.01          | 0.86           | 0.37             |
| Model 3           | 100    | 16         | 0.005         | 0.90           | 0.43             |
| Model 4           | 200    | 16         | 0.05          | 0.87           | 0.26             |

When I increased the number of epochs with decreasing learning rate, I could see that I was getting a better F-1 score for Dev and Test. I could understand that when LR rate is slow there is a slow convergence of the model and I get a better result.
If LR is more, the slow convergence is not there and the values are dynamic and are oscillating throught the loss calculation process.



## 2. What do you think would happen if you didn’t normalize the feature vectors? Write down a guess for what you think would happen, and then run an experiment to test your intuitions and report back what you learned.

Ans:
If I don't normalize the feature vectors, I expect the model to perform poorly because features with larger values could dominate the learning process, making the model biased toward those features. The gradients might also become unbalanced, causing slow or unstable learning. It would have Magnitude differences, convergence issues etc.

After experimenting without normalizing the feature vectors, I could see that my hypothesis was correct. For the best model for which I was getting the F1 score above 90, it significantly dropped.





## 3. What would happen if you removed one of the features entirely and used a 5-dimensional feature vector? Choose one feature and remove it from your vector. Then, run another experiment and see what happens. Does the test F1 go up or down? Does the model converge slower, or faster? Report which feature you removed and what you learned.

Ans:
If I remove one of the features entirely and use a 5-dimensional feature vector, I expect the model to lose some information, which could lead to a decrease in performance. Removing a feature might make the model converge faster since there are fewer dimensions to learn, but it could also result in lower accuracy or a decrease in the F1 score because the model is missing important data.

I remove contains_no as a feature and I could see that the F1 score decreased a little bit. There was a significance difference when I removed positive or negative word count. The model convergence has a negligible speed increase for contains_no but model converged faster if I removed the positive and negative features.



## 4. Review Section 4.10, p. 18 of the textbook and then consider the resources we used for this task: for instance, the training data and the positive and negative lexicons. Did you notice any biases present in these resources? Can you think of any harms or unintended consequences (harmful or not) that this classifier could cause? There is no correct answer; just write a couple of sentences reflecting on this prompt.

Ans:
I noticed that there might be some biases in the positive and negative lexicons. For example, the negative lexicon appears to contain a broader variety of words, some of which may be mislabelled, leading to inaccurate predictions. This could result in unfair or skewed classifications, especially if certain negative words that are not actually related to reviews are included. One potential harm of this classifier is that it could incorrectly label neutral or complex statements as negative, influencing sentiment analysis in ways that could be misleading for users relying on the system for feedback or decision-making.
