**VALENCE SCORE PREDICTOR**

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import string

file_path = '/Data.txt' # Path to the file containing the data

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
### 1. Load Data from File ###
def load_dataset(filename):
    dataset = [] # Create an empty list to store the data
    with open(file_path, "r", encoding="utf-8") as file: 
        for line in file:
            parts = line.strip().rsplit("\t", 1)  # Split by last space
            if len(parts) == 2: 
                word, score = parts # Assign the word and score to the variables
                try:
                    score = float(score) # Convert the score to a float
                    dataset.append((word.lower(), score)) # Append the word and score to the dataset
                except ValueError:
                    print("Error") # Print an error message if the line is not in the correct format
                    print(line)
                    break
    return dataset

dataset = load_dataset("Data.txt")

In [None]:
### 2. Character Encoding ###
chars = string.ascii_lowercase  # 26 lowercase letters
char_to_idx = {char: i for i, char in enumerate(chars)} # Create a dictionary with the index of each character
idx_to_char = {i: char for char, i in char_to_idx.items()}

def encode_word(word): 
    encoded = torch.zeros(len(word), input_size, dtype=torch.float) # Create a tensor of zeros with the length of the word
    for i, char in enumerate(word): # Loop through each character in the word
        if char in char_to_idx: # Check if the character is in the dictionary
            encoded[i, char_to_idx[char]] = 1 # Set the value of the character to 1
    return encoded.unsqueeze(0)

# No need for decoding, as the output will be a numeral (score)

In [None]:
### 3. Define Character-Level Sentiment Model ###
class CharRNN(nn.Module): # Create a class for the model
    def __init__(self, input_size, hidden_size, output_size, n_layers=1): # Define the initialisation function
        super(CharRNN, self).__init__() 
        self.hidden_size = hidden_size # Size of the hidden layer
        self.n_layers = n_layers # Number of layers in the model (set to default, 1)

        self.rnn = nn.LSTM(input_size, hidden_size, n_layers, batch_first=True) # Define the LSTM layer
        self.fc = nn.Linear(hidden_size, output_size) # Define the linear layer

    def forward(self, x, hidden):
        out, hidden = self.rnn(x, hidden) # Pass the input and hidden state through the LSTM layer
        out = self.fc(out[:, -1, :])  # Take last output for prediction
        return out, hidden

    def init_hidden(self, batch_size): # Initialise the hidden state
        return (torch.zeros(self.n_layers, batch_size, self.hidden_size),
                torch.zeros(self.n_layers, batch_size, self.hidden_size))

In [None]:
### 4. Training setup ###
input_size = len(chars)  # 26 (one-hot for each character)
hidden_size = 128 # Number of hidden units in the LSTM layer
output_size = 1  # Predict sentiment score

model = CharRNN(input_size, hidden_size, output_size) # Create an instance of the model
criterion = nn.MSELoss() # Define the loss function (MSELoss)
optimizer = optim.Adam(model.parameters(), lr=0.001) # Define the optimizer (Adam)

In [None]:
## Training loop
for epoch in range(100): # Loop through 100 epochs
    total_loss = 0 # Set the total loss to 0
    for word, score in dataset: # Loop through each word and score in the dataset
        encoded = encode_word(word).float()  # Convert to tensor
        hidden = (torch.zeros(model.n_layers, 1, model.hidden_size), # Initialise hidden state
                  torch.zeros(model.n_layers, 1, model.hidden_size))

        output, _ = model(encoded, hidden) # Pass the input and hidden state through the model
        loss = criterion(output, torch.tensor([[score]])) # Calculate the loss

        optimizer.zero_grad() 
        loss.backward() 
        optimizer.step()  # Update the weights

        total_loss += loss.item() 

    if epoch % 10 == 0: # Print the epoch and loss every 10
        print(f"Epoch {epoch}, Loss: {total_loss:.4f}")

Epoch 0, Loss: 19769.1251
Epoch 10, Loss: 16342.0889
Epoch 20, Loss: 4963.1839
Epoch 30, Loss: 1645.8936
Epoch 40, Loss: 844.8128
Epoch 50, Loss: 528.3331
Epoch 60, Loss: 412.0173
Epoch 70, Loss: 371.6645
Epoch 80, Loss: 285.2679
Epoch 90, Loss: 259.2511


In [None]:
### 5. Predict the valence score ###
def predict(word): # Define a function to predict the valence score
    model.eval() # Set the model to evaluation mode
    encoded = encode_word(word).float() # Encode the word
    hidden = model.init_hidden(1) # Initialise the hidden state
    output, _ = model(encoded, hidden) # Pass the input and hidden state through the model
    return output.item() # Return the output as a float (the valence score)

prediction_word = "test" # Word to predict the valence score for!!
print(f"Score for ({prediction_word}):", predict(prediction_word))