In [5]:
import ast
import sqlite3
import torch
import time
# Connect to the SQLite database
conn = sqlite3.connect('tenhou_dataprocess/dst/2016-2020_after_script_waits.db')
cursor = conn.cursor()
# Query the data
start_time = time.time()
cursor.execute('SELECT X_values, y_values FROM test_table')
rows = cursor.fetchall()

# Process the data
X_extracted = []
y_extracted = []
counter = 0
for row in rows:
    counter += 1
    X_row = ast.literal_eval(row[0])  # Convert string back to list
    y_row = ast.literal_eval(row[1])  # Convert string back to list
    X_extracted.append(X_row)
    y_extracted.append(y_row)
    if(counter//1000 == 500):
        break
    if(counter % 1000 == 0):
        print(f"Processed {counter} rows", end='\r')
    
end = time.time()

# Convert lists back to NumPy arrays or tensors if needed
import numpy as np

X_extracted = np.array(X_extracted)
y_extracted = np.array(y_extracted)

# Optionally, convert to tensors
X = torch.tensor(X_extracted, dtype=torch.float)
y = torch.tensor(y_extracted, dtype=torch.float)

# Close the connection
conn.close()
print(X.shape, y.shape)  # Check shapes of tensors

torch.Size([500000, 368]) torch.Size([500000, 34])


In [6]:
#split the data into train, validation and test
index1 = int(len(X)*0.8)
index2 = int(len(X)*0.9)
X_train = X[:index1]
X_val = X[index1:index2]
X_test = X[index2:]
y_train = y[:index1]
y_val = y[index1:index2]
y_test = y[index2:]

In [11]:
import tensorflow as tf
from transformers import  BertModel
from torch.optim import Adam
from torch.optim.lr_scheduler import ReduceLROnPlateau
import torch.nn as nn
from torch.utils.data import DataLoader
import torch
import random

class MahjongModel(nn.Module):
    def __init__(self):
        super(MahjongModel, self).__init__()
        #self.embedding = nn.Embedding(37, 128)
        #self.projection = nn.Linear(368, 400)
        self.projection = nn.Linear(368, 768)
        self.bert = BertModel.from_pretrained('bert-base-uncased')
        self.fc2 = nn.Linear(768, 34)
        
    def forward(self, x_batch):
        # Convert categorical features to embeddings
        #x_emb = self.embedding(x_batch)  # Shape: (batch_size, seq_length, embedding_dim)
        
        # Project concatenated features to BERT's hidden size
        #x = self.fc1(torch.cat((x_batch, x_emb)))  # Shape: (batch_size, hidden_size)
        #x = self.fc1(x_batch)
        # Add sequence length dimension for BERT
        #x = x.unsqueeze(1)  # Shape: (batch_size, sequence_length=1, hidden_size)
        x_batch = self.projection(x_batch)
        
        x_batch = x_batch.unsqueeze(1)
        # Use BERT to process the combined features
        outputs = self.bert(inputs_embeds=x_batch)[0]
        
        # Pass through classification head
        outputs = self.fc2(outputs)
        
        y_hat = torch.sigmoid(outputs)
        return y_hat
    
from torch.utils.data import Dataset 
class MahjongDataset(Dataset):
    def __init__(self, data, labels):
        self.data = data
        self.labels = labels

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return {
            'input': torch.tensor(self.data[idx], dtype=torch.float),
            'label': torch.tensor(self.labels[idx], dtype=torch.float)
        }
    
    
def getindices(tensor):
    mylist = []
    for i in range(len(tensor)):
        if tensor[i]:
            mylist.append(i)
    return mylist
    
    

def eval(model, X_val, y_val):
    model.eval()
    with torch.no_grad():
        outputs = model(X_val)
        outputs = (outputs >= 0.5).float()
        ran = random.randint(0, 1070)
        print(getindices(outputs[ran][0]),getindices(y_val[ran]))
        count = 0
        for i in range(y_val.size(0)):
            count += getindices(outputs[i][0]) == getindices(y_val[i])
        total_samples = y_val.size(0)
        misclassification_rate = 1.0 - (count / total_samples)
        return misclassification_rate

# Example Usage
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = MahjongModel().to(device)


train_dataset = MahjongDataset(X_train, y_train)
train_dataloader = DataLoader(train_dataset, batch_size=512, shuffle=True)

loss_function = nn.BCELoss().to(device)
optimizer = Adam(model.parameters() ,lr=1e-4)
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=2)
  
for epoch in range(10):
    start = time.time()
    model.train()
    num_batches = 0
    for param in model.bert.parameters():
        param.requires_grad = False
    epoch_loss = 0
    for batch in train_dataloader:
        inputs = batch["input"].to(device)
        labels = batch["label"].to(device)
        outputs = model(inputs)
        y_pred_tf = torch.squeeze(outputs).to(device)

        
        loss = loss_function(y_pred_tf, labels)
        optimizer.zero_grad()
        loss.backward()
        num_batches += 1

        optimizer.step()
        
        epoch_loss += loss.item()  # Accumulate the loss
    print("Epoch ", epoch, " train loss is: ", epoch_loss/num_batches)
    print("Time taken for epoch ", epoch, " is ", time.time()-start)
    miss = eval(model, X_val.to(device), y_val.to(device))
    start = time.time()
    print("miss is ", miss, "Time taken for eval is ", time.time()-start)
    

  'input': torch.tensor(self.data[idx], dtype=torch.float),
  'label': torch.tensor(self.labels[idx], dtype=torch.float)


Epoch  0  train loss is:  0.2259479618400259
[] [3, 22]
miss is  1.0
Epoch  1  train loss is:  0.1978073485405244
[] [19, 22]
miss is  1.0
Epoch  2  train loss is:  0.19754531432676803
[] [14]
miss is  1.0
Epoch  3  train loss is:  0.19738119791078446
[] [6, 28]
miss is  1.0
Epoch  4  train loss is:  0.19725125044812936
[] [10]
miss is  1.0
Epoch  5  train loss is:  0.1971239298391525
[] [18, 21, 24]
miss is  1.0
Epoch  6  train loss is:  0.19699102344796482
[] [10, 27]
miss is  1.0
Epoch  7  train loss is:  0.196775687701257
[] [19, 22]
miss is  1.0
Epoch  8  train loss is:  0.19650366382144602
[] [2, 5, 8]
miss is  1.0
Epoch  9  train loss is:  0.19627205997019473
[] [2, 5]
miss is  1.0


In [7]:
train_dataset = MahjongDataset(X_train, y_train)
train_dataloader = DataLoader(train_dataset, batch_size=2048, shuffle=True)

In [None]:
#Real training
optimizer = Adam(model.parameters() ,lr=1e-4)
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=2)
  
for epoch in range(100): # can train for as long as you want
    start = time.time()
    model.train()
    for param in model.bert.parameters():
        param.requires_grad = False
    for param in model.bert.encoder.layer[-6:].parameters():
        param.requires_grad = True
    num_batches = 0
    epoch_loss = 0
    for batch in train_dataloader:
        inputs = batch["input"].to(device)
        labels = batch["label"].to(device)
        outputs = model(inputs)
        y_pred_tf = torch.squeeze(outputs).to(device)
        loss = loss_function(y_pred_tf, labels)
        optimizer.zero_grad()
        loss.backward()
        num_batches += 1

        optimizer.step()
        
        epoch_loss += loss.item()
    print("Epoch ", epoch, " train loss is: ", epoch_loss/num_batches, "Time taken for epoch ", epoch, " is ", time.time()-start)
    start = time.time()
    if(epoch % 5 == 0):    
        miss = eval(model, X_val.to(device), y_val.to(device))
        print("Miss is ", miss, "Time taken for eval is ", time.time()-start)  


  'input': torch.tensor(self.data[idx], dtype=torch.float),
  'label': torch.tensor(self.labels[idx], dtype=torch.float)


Epoch  0  train loss is:  0.15841774023173716 Time taken for epoch  0  is  112.30009579658508
[] [10, 13]
Miss is  0.99744 Time taken for eval is  45.014870166778564
Epoch  1  train loss is:  0.15118931527332882 Time taken for epoch  1  is  113.00276589393616
Epoch  2  train loss is:  0.14400003837121417 Time taken for epoch  2  is  122.47662568092346
Epoch  3  train loss is:  0.1369739359106554 Time taken for epoch  3  is  126.85388088226318
Epoch  4  train loss is:  0.13022839528558505 Time taken for epoch  4  is  174.46911644935608
Epoch  5  train loss is:  0.12355312764111077 Time taken for epoch  5  is  133.2139208316803
[] [2, 4, 5, 7, 8]
Miss is  0.9954 Time taken for eval is  50.22937297821045
Epoch  6  train loss is:  0.11751224530284363 Time taken for epoch  6  is  129.43603014945984
Epoch  7  train loss is:  0.11175660018230338 Time taken for epoch  7  is  117.39853882789612
Epoch  8  train loss is:  0.10630872547435943 Time taken for epoch  8  is  107.04766798019409
Epoch  

In [9]:
model._save_to_state_dict('model.pth',keep_vars=True,prefix='model')

TypeError: Module._save_to_state_dict() missing 2 required positional arguments: 'prefix' and 'keep_vars'

In [None]:
model.load_state_dict(torch.load('model.pth'))