In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
import numpy as np
import pickle
import pandas as pd
import math
import json
import copy

In [2]:
with open('tot_dict.pkl', 'rb') as f:
    loaded_dict = pickle.load(f)

In [3]:
def parser(path):
    with open(path) as file:
        data = json.load(file)

    df = pd.json_normalize(data, record_path=['data'])
    return data, df

path = 'problems_2023_01_30/problems MoonBoard 2016 .json'

data, df = parser(path)
grade_df = df['grade']

In [4]:
grade_mapping = {
    '6B': 0, '6B+': 0, '6C': 1, '6C+': 1, '7A': 2, '7A+': 3,
    '7B': 4, '7B+': 4, '7C': 5, '7C+': 6, '8A': 7,
    '8A+': 8, '8B': 9, '8B+': 10
}

In [5]:
data = []
for i, row in df.iterrows():
    move_list = []
    for j in row['moves']:
        move_list.append(j['description'])
    data.append((move_list, grade_mapping[row['grade']]))
data

[(['E6', 'C5', 'E8', 'F11', 'C13', 'D15', 'D18'], 0),
 (['F5', 'J8', 'I11', 'K11', 'G13', 'E15', 'E18'], 0),
 (['F5', 'J5', 'F9', 'C10', 'G13', 'E15', 'I18'], 0),
 (['B10', 'E6', 'E8', 'E12', 'E16', 'G4', 'I18', 'J5'], 2),
 (['C5', 'D7', 'A9', 'F11', 'F13', 'I14', 'E16', 'D18'], 2),
 (['D3', 'D6', 'F9', 'I12', 'D13', 'D16', 'C18'], 4),
 (['D3', 'B4', 'C7', 'F9', 'C12', 'D13', 'A16', 'C18'], 1),
 (['B3', 'E7', 'C10', 'F12', 'G14', 'I18'], 4),
 (['J5', 'H8', 'C10', 'E13', 'B16', 'G18'], 3),
 (['F5', 'I5', 'F9', 'C10', 'F11', 'C14', 'C15', 'E18', 'E18'], 1),
 (['C5', 'B6', 'E8', 'B10', 'E14', 'E16', 'I18'], 2),
 (['G6', 'K9', 'H12', 'G14', 'B15', 'A18'], 4),
 (['I4', 'E7', 'H11', 'D12', 'B16', 'G18'], 2),
 (['G4', 'B7', 'G8', 'D11', 'C13', 'G17', 'I18'], 4),
 (['C7', 'F15', 'G11', 'H18', 'I5'], 4),
 (['B4', 'C7', 'C18', 'D16', 'D10', 'D3', 'H14'], 3),
 (['H5', 'B6', 'E9', 'E12', 'G15', 'C16', 'A18'], 2),
 (['I5', 'G7', 'D10', 'C12', 'C15', 'C18'], 4),
 (['J5', 'D15', 'E12', 'F11', 'G9', '

In [6]:
embeddings_data = []
max_seq_length = 0

for holds, grade in data:
    embeddings = []
    for hold in holds:
        if hold in loaded_dict:
            embeddings.append(loaded_dict[hold])
        else:
            pass
    
    max_seq_length = max(max_seq_length, len(embeddings))
    
    embeddings_data.append((embeddings, grade))

In [7]:
for i, (embeddings, grade) in enumerate(embeddings_data):
    if len(embeddings) < max_seq_length:
        padding = [np.zeros_like(embeddings[0])] * (max_seq_length - len(embeddings))
        embeddings_data[i] = (embeddings + padding, grade)

In [8]:
inputs = np.array([np.array(embeddings) for embeddings, _ in embeddings_data])
labels = np.array([grade for _, grade in embeddings_data])

In [9]:
print("Inputs shape:", inputs.shape)
print("Labels length:", len(labels))

Inputs shape: (59506, 28, 100)
Labels length: 59506


In [10]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm


class EmbeddedClimbingDataset(Dataset):
    def __init__(self, embedded_inputs, labels):
        self.embedded_inputs = embedded_inputs
        self.labels = labels

    def __len__(self):
        return len(self.embedded_inputs)

    def __getitem__(self, idx):
        embedded_problem = torch.tensor(self.embedded_inputs[idx]).float()
        label = torch.tensor(self.labels[idx]).long()
        return embedded_problem, label

class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_seq_length):
        super(PositionalEncoding, self).__init__()

        encoding = torch.zeros(max_seq_length, d_model)
        position = torch.arange(0, max_seq_length, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model))

        encoding[:, 0::2] = torch.sin(position * div_term)
        encoding[:, 1::2] = torch.cos(position * div_term)

        self.register_buffer('encoding', encoding.unsqueeze(0))

    def forward(self, x):
        return x + self.encoding[:, :x.size(1)]

class TransformerClassifier(nn.Module):
    def __init__(self, input_dim, output_dim, num_layers=3, num_heads=8, hidden_dim=128, dropout=0.1):
        super(TransformerClassifier, self).__init__()
        self.num_layers = num_layers
        self.embedding = nn.Linear(input_dim, hidden_dim)
        self.positional_encoding = PositionalEncoding(hidden_dim, max_seq_length=input_dim)
        
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=hidden_dim, 
            nhead=num_heads, 
            dim_feedforward=hidden_dim*4, 
            dropout=dropout
        )
        self.encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, x):
        x = self.embedding(x)
        x = self.positional_encoding(x)
        x = x.permute(1, 0, 2)
        encoder_output = self.encoder(x)
        encoder_output = encoder_output.mean(dim=0)
        x = self.fc(encoder_output)
        return x

X_train, X_test, y_train, y_test = train_test_split(inputs, labels, test_size=0.2, random_state=42)

X_train = np.array(X_train)
X_test = np.array(X_test)
y_train = np.array(y_train)
y_test = np.array(y_test)

train_dataset = EmbeddedClimbingDataset(X_train, y_train)
test_dataset = EmbeddedClimbingDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

input_dim = inputs[0].shape[1]
output_dim = len(set(labels))

model = TransformerClassifier(input_dim=input_dim, output_dim=output_dim)
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

def calculate_accuracy(outputs, labels):
    _, predicted = torch.max(outputs, 1)
    correct = (predicted == labels).sum().item()
    accuracy = correct / labels.size(0)
    
    _, predicted_top3 = outputs.topk(3, 1, largest=True, sorted=True)
    labels = labels.view(-1, 1).expand_as(predicted_top3)
    correct_top3 = (predicted_top3 == labels).sum().item()
    top3_accuracy = correct_top3 / labels.size(0)
    
    return accuracy, top3_accuracy



In [11]:
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    total_train_accuracy = 0.0
    total_train_top3_accuracy = 0.0
    
    tqdm_train_loader = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}", leave=False)
    for embedded_inputs, labels in tqdm_train_loader:
        optimizer.zero_grad()
        outputs = model(embedded_inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        tqdm_train_loader.set_postfix({'Loss': running_loss / len(tqdm_train_loader)})
        
        batch_acc, batch_top3_acc = calculate_accuracy(outputs, labels)
        total_train_accuracy += batch_acc
        total_train_top3_accuracy += batch_top3_acc
    
    avg_train_accuracy = total_train_accuracy / len(train_loader)
    avg_train_top3_accuracy = total_train_top3_accuracy / len(train_loader)
    
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss / len(train_loader):.4f}, Train Acc: {avg_train_accuracy:.4f}, Train Top-3 Acc: {avg_train_top3_accuracy:.4f}")
    
    model.eval()
    total_accuracy = 0.0
    total_top3_accuracy = 0.0
    
    with torch.no_grad():
        tqdm_test_loader = tqdm(test_loader, desc=f"Evaluating", leave=False)
        for embedded_inputs, labels in tqdm_test_loader:
            outputs = model(embedded_inputs)
            acc, top3_acc = calculate_accuracy(outputs, labels)
            total_accuracy += acc
            total_top3_accuracy += top3_acc
            tqdm_test_loader.set_postfix({'Acc': total_accuracy / len(tqdm_test_loader), 'Top-3 Acc': total_top3_accuracy / len(tqdm_test_loader)})
    
    avg_accuracy = total_accuracy / len(test_loader)
    avg_top3_accuracy = total_top3_accuracy / len(test_loader)
    print(f"Accuracy: {avg_accuracy:.4f}, Top-3 Accuracy: {avg_top3_accuracy:.4f}")


Epoch 1/10:   0%|          | 0/1488 [00:00<?, ?it/s]

                                                                           

Epoch [1/10], Loss: 1.3383, Train Acc: 0.4529, Train Top-3 Acc: 0.8270


                                                                                          

Accuracy: 0.4746, Top-3 Accuracy: 0.8459


                                                                           

KeyboardInterrupt: 