In [1]:
import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))


2024-06-09 19:03:15.616905: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Num GPUs Available:  0


2024-06-09 19:03:17.092201: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-06-09 19:03:17.092654: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2251] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


In [4]:
import sys
sys.path.append('../')
import LoadIntoDf

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset, random_split
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import classification_report
from scipy.sparse import csr_matrix

# Load the data
df = LoadIntoDf.load_sqlite_data()
print('data loaded')

data loaded


In [10]:
torch.cuda.is_available()

True

In [5]:

# Split data into features and labels
X = df.drop('difficulty', axis=1).values
y = df.difficulty.str.split('/').str[0]

label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [15]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f'Using device: {device}')
# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(device)
y_train_tensor = torch.tensor(y_train, dtype=torch.long).to(device)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(device)
y_test_tensor = torch.tensor(y_test, dtype=torch.long).to(device)

# Create PyTorch datasets and dataloaders
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


Using device: cuda


In [16]:

# Define the neural network model
class NeuralNet(nn.Module):
    def __init__(self, input_size, num_classes):
        super(NeuralNet, self).__init__()
        self.layer1 = nn.Linear(input_size, 512)
        self.layer2 = nn.Linear(512, 256)
        self.layer3 = nn.Linear(256, 128)
        self.layer4 = nn.Linear(128, num_classes)
        self.dropout = nn.Dropout(0.5)
        self.relu = nn.ReLU()
        self.l2_reg = 0.001  # L2 regularization factor

    def forward(self, x):
        out = self.relu(self.layer1(x))
        out = self.dropout(out)
        out = self.relu(self.layer2(out))
        out = self.dropout(out)
        out = self.relu(self.layer3(out))
        out = self.dropout(out)
        out = self.layer4(out)
        return out

In [17]:

input_size = X_train.shape[1]
num_classes = len(label_encoder.classes_)

model = NeuralNet(input_size, num_classes).to(device)

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), weight_decay=model.l2_reg)


In [20]:

# Training the model
num_epochs = 2

for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    correct = 0
    total = 0
    
    for i, (inputs, labels) in enumerate(train_loader):
         # Move inputs and labels to GPU
        inputs, labels = inputs.to(device), labels.to(device)
      
        # Zero the parameter gradients
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        
        # Backward pass and optimize
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        
        if i % 100 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(train_loader)}], Loss: {loss.item():.4f}')
    
    epoch_loss = total_loss / len(train_loader)
    epoch_accuracy = correct / total
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy:.4f}')

# Evaluate the model
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    all_labels = []
    all_preds = []
    
    for inputs, labels in test_loader:

        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        all_labels.extend(labels.cpu().numpy())
        all_preds.extend(predicted.cpu().numpy())
        
    test_accuracy = correct / total
    print(f'Test Accuracy: {test_accuracy:.4f}')

    # Generate classification report


Epoch [1/2], Step [1/4345], Loss: 2.1687
Epoch [1/2], Step [101/4345], Loss: 2.4317
Epoch [1/2], Step [201/4345], Loss: 2.0893
Epoch [1/2], Step [301/4345], Loss: 2.1963
Epoch [1/2], Step [401/4345], Loss: 2.1510
Epoch [1/2], Step [501/4345], Loss: 2.2197
Epoch [1/2], Step [601/4345], Loss: 2.1119
Epoch [1/2], Step [701/4345], Loss: 2.1166
Epoch [1/2], Step [801/4345], Loss: 2.0399
Epoch [1/2], Step [901/4345], Loss: 1.9515
Epoch [1/2], Step [1001/4345], Loss: 2.0275
Epoch [1/2], Step [1101/4345], Loss: 2.1541
Epoch [1/2], Step [1201/4345], Loss: 2.5040
Epoch [1/2], Step [1301/4345], Loss: 2.4448
Epoch [1/2], Step [1401/4345], Loss: 2.1129
Epoch [1/2], Step [1501/4345], Loss: 2.1834
Epoch [1/2], Step [1601/4345], Loss: 2.0541
Epoch [1/2], Step [1701/4345], Loss: 2.3450
Epoch [1/2], Step [1801/4345], Loss: 2.1588
Epoch [1/2], Step [1901/4345], Loss: 2.3018
Epoch [1/2], Step [2001/4345], Loss: 2.1495
Epoch [1/2], Step [2101/4345], Loss: 1.8371
Epoch [1/2], Step [2201/4345], Loss: 2.0112


In [30]:
print(classification_report(all_labels, all_preds, target_names=label_encoder.classes_[:-1]))


              precision    recall  f1-score   support

          4a       0.42      0.49      0.45       818
          4b       0.18      0.07      0.10       651
          4c       0.00      0.00      0.00       620
          5a       0.19      0.03      0.06      1113
          5b       0.00      0.00      0.00      1117
          5c       0.20      0.40      0.27      2084
          6a       0.25      0.28      0.26      2834
         6a+       0.00      0.00      0.00      2284
          6b       0.20      0.42      0.27      3125
         6b+       0.00      0.00      0.00      2587
          6c       0.21      0.18      0.20      3428
         6c+       0.00      0.00      0.00      2697
          7a       0.22      0.59      0.32      3816
         7a+       0.21      0.39      0.27      2874
          7b       0.19      0.00      0.01      1791
         7b+       0.00      0.00      0.00      1157
          7c       0.26      0.15      0.19       984
         7c+       0.29    

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [29]:

num_classes = len(label_encoder.classes_)
num_classes

23