In [5]:
def create_dataset():
    with open("/content/exam/dutch.txt", 'r') as infile:
        dutch = infile.read().split("\n")
    
    with open("/content/exam/hungarian.txt", 'r') as infile:
        hungarian = infile.read().split("\n")
    
    with open("/content/exam/portugese.txt", 'r') as infile:
         portugese = infile.read().split("\n")
    
    dutch_count = int(0.8*len(dutch))
    dutch_train = dutch[ : dutch_count]
    dutch_test = dutch[dutch_count:]

    hungarian_count = int(0.8*len(dutch))
    hungarian_train = dutch[ : dutch_count]
    hungarian_test = dutch[dutch_count:]

    portugese_count = int(0.8*len(dutch))
    portugese_train = dutch[ : dutch_count]
    portugese_test = dutch[dutch_count:]

    return dutch_train, hungarian_train, portugese_train, dutch_test, hungarian_test, portugese_test

In [6]:
dutch_train, hungarian_train, portugese_train, dutch_test, hungarian_test, portugese_test = create_dataset()  

In [9]:
index_to_word = ["UNK"] + list(set(dutch_train + hungarian_train + portugese_train))
word_to_index = { item : index for index, item in enumerate(index_to_word) }

In [10]:
import torch
import torch.nn as nn
class Net(nn.Module):
    def __init__(self, embedding_input: int):
        super().__init__()
        self.embedding = nn.Embedding(embedding_input, 128)
        self.linear_1 = nn.Linear(128, 32)
        self.linear_2 = nn.Linear(32, 3)
        
    def forward(self, token):
        relu = nn.ReLU()
        output = self.embedding(token)
        output = relu(output)
        output = self.linear_1(output)
        output = relu(output)
        output = self.linear_2(output)
        output = nn.Sigmoid()(output)
        return output

In [11]:
from torch.utils.data import Dataset

class CustomDataset(Dataset):
    def __init__(self, dutch, hungarian, portugese):    
        super().__init__()
        self.data = []
        self.target = []

        for target_index, tokens_list in enumerate([dutch, hungarian, portugese]):
            for token in tokens_list:
                self.data.append(word_to_index.get(token, 0))
                self.target.append(target_index)
        self.data = torch.tensor(self.data)
        self.target = torch.tensor(self.target) 

    def __getitem__(self, index):
        return self.data[index], self.target[index]

    def __len__(self):
        return self.target.shape[0]   

In [15]:
from torch.utils.data import Dataset, DataLoader  

train_dataset = CustomDataset(dutch_train, hungarian_train, portugese_train)
test_dataset = CustomDataset(dutch_train, hungarian_train, portugese_train)

train_loader = DataLoader(train_dataset, batch_size=16)
test_loader = DataLoader(test_dataset, batch_size=16)

In [20]:
neural_net = Net(len(index_to_word))
optimizer = torch.optim.Adam(neural_net.parameters())
entropy = nn.CrossEntropyLoss()

for epoch in range(10):
    train_history = []
    test_history = []
    neural_net.train()
    for batch in train_loader:
        optimizer.zero_grad()
        data, target = batch 
        prediction = neural_net(data)
        loss = entropy(prediction, target)
        loss.backward()
        optimizer.step()
        train_history.append(loss.item())
    neural_net.eval()
    for batch in test_loader:
        data, target = batch 
        prediction = neural_net(data)
        loss = entropy(prediction, target)
        test_history.append(loss.item())
    print(  f"Epoch {epoch} train history {sum(train_history) / len(train_history)} test history {sum(test_history)/ len(test_history)}")

Epoch 0 train history 0.9390934077051256 test history 1.2171184798482515
Epoch 1 train history 1.1887595976464 test history 1.2137234031467
Epoch 2 train history 1.166942331564186 test history 1.2172166357294627
Epoch 3 train history 1.2989629356764922 test history 1.1949750398009242
Epoch 4 train history 1.2470451705909256 test history 1.1949286786383098
Epoch 5 train history 1.1949639881206864 test history 1.1948041146358084
Epoch 6 train history 1.3131770920169457 test history 1.0986123026619727
Epoch 7 train history 1.0986264746539531 test history 1.0986123162319061
Epoch 8 train history 1.0986360618978825 test history 1.0986123244769292
Epoch 9 train history 1.0986467548337382 test history 1.0986122963064342
