## Using a Neural Network

In [1]:
import pandas as pd

In [2]:
loaded_data = pd.read_pickle("data_with_embeddings.pkl")
print(loaded_data.head())

                                                text  target  \
0  @switchfoot http://twitpic.com/2y1zl - Awww, t...       0   
1  is upset that he can't update his Facebook by ...       0   
2  @Kenichan I dived many times for the ball. Man...       0   
3    my whole body feels itchy and like its on fire        0   
4  @nationwideclass no, it's not behaving at all....       0   

                                          clean_text  \
0     awww thats a bummer  you shoulda got david ...   
1  is upset that he cant update his facebook by t...   
2   i dived many times for the ball managed to sa...   
3    my whole body feels itchy and like its on fire    
4   no its not behaving at all im mad why am i he...   

                                           embedding  
0  [0.101073705, 0.14796087, 0.12037146, 0.226526...  
1  [0.13524286, 0.036169093, 0.3622666, 0.0267785...  
2  [0.034617905, -0.24725968, 0.18772389, 0.13896...  
3  [0.078177616, 0.3227022, 0.59566224, -0.115340...  
4  

In [3]:
from sklearn.model_selection import train_test_split

In [4]:
X = list(loaded_data['embedding'])
y = loaded_data['target']

y = y.replace(4, 1) # Binary labels 0 or 1

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
import numpy as np
X_train = np.array(X_train)
y_train = np.array(y_train)
X_test = np.array(X_train)
y_test = np.array(y_train)


In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

In [7]:
device = "cuda"

In [8]:
X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(device)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).to(device)

In [9]:
train_data = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_data, batch_size=128, shuffle=True)

In [10]:
class NN(nn.Module):
    def __init__(self, input_dim):
        super(NN, self).__init__()
        self.fc1 = nn.Linear(input_dim, 512)  
        self.fc2 = nn.Linear(512, 256)         
        self.fc3 = nn.Linear(256, 128)         
        self.fc4 = nn.Linear(128, 64)          
        self.fc5 = nn.Linear(64, 1)   
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.relu(self.fc3(x))
        x = self.relu(self.fc4(x))
        x = self.fc5(x)
        return self.sigmoid(x)

In [11]:
model = NN(input_dim=len(X_train[0])).to(device)
loss_function = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [12]:
num_epochs = 100
for epoch in range(num_epochs):
    model.train()
    for batch in train_loader:
        X_batch, y_batch = batch
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = loss_function(outputs.squeeze(), y_batch)
        loss.backward()
        optimizer.step()
    
    print(f"Epoch {epoch+1}, Loss: {loss.item()}")

Epoch 1, Loss: 0.360352098941803
Epoch 2, Loss: 0.42877113819122314
Epoch 3, Loss: 0.46368199586868286
Epoch 4, Loss: 0.38883379101753235
Epoch 5, Loss: 0.39311784505844116
Epoch 6, Loss: 0.35143229365348816
Epoch 7, Loss: 0.3930535316467285
Epoch 8, Loss: 0.45335331559181213
Epoch 9, Loss: 0.3166407346725464
Epoch 10, Loss: 0.3235229551792145
Epoch 11, Loss: 0.39220380783081055
Epoch 12, Loss: 0.3487279415130615
Epoch 13, Loss: 0.32577475905418396
Epoch 14, Loss: 0.353398859500885
Epoch 15, Loss: 0.39106348156929016
Epoch 16, Loss: 0.38858458399772644
Epoch 17, Loss: 0.3127692937850952
Epoch 18, Loss: 0.34882497787475586
Epoch 19, Loss: 0.3282110095024109
Epoch 20, Loss: 0.36366939544677734
Epoch 21, Loss: 0.35229742527008057
Epoch 22, Loss: 0.36361876130104065
Epoch 23, Loss: 0.3153032660484314
Epoch 24, Loss: 0.422526478767395
Epoch 25, Loss: 0.26071855425834656
Epoch 26, Loss: 0.32859504222869873
Epoch 27, Loss: 0.3802975118160248
Epoch 28, Loss: 0.3436061143875122
Epoch 29, Loss: 

In [13]:
from sklearn.metrics import accuracy_score

In [14]:
model.eval()

NN(
  (fc1): Linear(in_features=768, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=256, bias=True)
  (fc3): Linear(in_features=256, out_features=128, bias=True)
  (fc4): Linear(in_features=128, out_features=64, bias=True)
  (fc5): Linear(in_features=64, out_features=1, bias=True)
  (relu): ReLU()
  (sigmoid): Sigmoid()
)

In [1]:
X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(device)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).to(device)

batch_size = 64

y_true = []
y_pred = []

num_samples = len(X_test_tensor[0])
for i in range(0, num_samples, batch_size):
    # Get the batch
    X_batch = X_test_tensor[i:i+batch_size].to(device)
    y_batch = y_test_tensor[i:i+batch_size].to(device)
    
    # Forward pass to get predictions
    with torch.no_grad():
        outputs = model(X_batch)
        
    # Convert outputs to binary predictions (0 or 1)
    predicted = (outputs.squeeze() > 0.5).float()  # 0.5 threshold for binary classification
    
    # Append true labels and predicted labels
    y_true.extend(y_batch.cpu().numpy())  # Move to CPU if necessary
    y_pred.extend(predicted.cpu().numpy())  # Move to CPU if necessary

# Calculate accuracy
accuracy = accuracy_score(y_true, y_pred)

# Print accuracy
print(f'Accuracy: {accuracy * 100:.2f}%')


NameError: name 'torch' is not defined