<a href="https://colab.research.google.com/github/NoCodeProgram/CodingTest/blob/main/transformer/name_gen.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!git clone https://github.com/NoCodeProgram/deepLearning.git

Cloning into 'deepLearning'...
remote: Enumerating objects: 266, done.[K
remote: Counting objects: 100% (127/127), done.[K
remote: Compressing objects: 100% (125/125), done.[K
remote: Total 266 (delta 48), reused 0 (delta 0), pack-reused 139[K
Receiving objects: 100% (266/266), 12.41 MiB | 20.76 MiB/s, done.
Resolving deltas: 100% (84/84), done.


In [1]:
import torch
print(torch.__version__)
if torch.backends.mps.is_available():
    my_device = torch.device('mps')
elif torch.cuda.is_available():
    my_device = torch.device('cuda')
else:
    my_device = torch.device('cpu')
print(my_device)


2.2.1+cu121
cpu


In [3]:
import pandas as pd
df = pd.read_csv('./deepLearning/rnn/name_gender_filtered.csv')

unique_chars = set()

for name in df['Name']:
    unique_chars.update(name)
sorted_chars = sorted(list(unique_chars))



In [4]:
sorted_chars = sorted(set(''.join(sorted_chars)))
stoi = {s:i for i,s in enumerate(sorted_chars)}
stoi['<P>'] = len(stoi) #padding tokken
itos = {i:s for s,i in stoi.items()}
print(stoi)
print(itos)

{'a': 0, 'b': 1, 'c': 2, 'd': 3, 'e': 4, 'f': 5, 'g': 6, 'h': 7, 'i': 8, 'j': 9, 'k': 10, 'l': 11, 'm': 12, 'n': 13, 'o': 14, 'p': 15, 'q': 16, 'r': 17, 's': 18, 't': 19, 'u': 20, 'v': 21, 'w': 22, 'x': 23, 'y': 24, 'z': 25, '<P>': 26}
{0: 'a', 1: 'b', 2: 'c', 3: 'd', 4: 'e', 5: 'f', 6: 'g', 7: 'h', 8: 'i', 9: 'j', 10: 'k', 11: 'l', 12: 'm', 13: 'n', 14: 'o', 15: 'p', 16: 'q', 17: 'r', 18: 's', 19: 't', 20: 'u', 21: 'v', 22: 'w', 23: 'x', 24: 'y', 25: 'z', 26: '<P>'}


In [6]:

char_length = 16
def encode_name(name):
    name = [stoi[s] for s in name]
    name += [stoi['<P>']]*(char_length-len(name))
    return name

def decode_name(name):
    decoded_chars = [itos[i] for i in name if itos[i] != '<P>']
    return ''.join(decoded_chars)

print(encode_name('nocope'))
print(decode_name(encode_name('nocope')))

gen2num = {'F':0, 'M':1}
num2gen = {0:'F', 1:'M'}


[13, 14, 2, 14, 15, 4, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26]
nocope


In [7]:
char_length = 16
n_embed = 32
n_head = 4
n_layer = 4

import torch
import torch.nn as nn
import torch.nn.functional as F

class SelfAttention(nn.Module):
    def __init__(self, embed_dim, atten_dim):
        super().__init__()
        self.query = nn.Linear(embed_dim, atten_dim, bias=False)
        self.key = nn.Linear(embed_dim, atten_dim, bias=False)
        self.value = nn.Linear(embed_dim, atten_dim, bias=False)

    def forward(self, x):
        query = self.query(x)
        key = self.key(x)
        value = self.value(x)

        scores = torch.matmul(query, key.transpose(-2, -1))
        scores = scores / key.size(-1)**0.5

        attention_weights = F.softmax(scores, dim=-1)
        weighted_values = torch.matmul(attention_weights, value)

        return weighted_values

class MultiheadAttention(nn.Module):
    def __init__(self, embed_dim, num_heads):
        super().__init__()
        attention_dim = embed_dim // num_heads
        self.attentions = nn.ModuleList([SelfAttention(embed_dim, attention_dim) for _ in range(num_heads)])
        self.fc = nn.Linear(embed_dim, embed_dim)

    def forward(self, x):
        head_outputs = []
        for attention in self.attentions:
            head_output = attention(x)
            head_outputs.append(head_output)

        concatenated_heads = torch.cat(head_outputs, dim=-1)
        output = self.fc(concatenated_heads)
        return output

class FeedFoward(nn.Module):
    def __init__(self, embed_dim, ff_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(embed_dim, ff_dim),
            nn.ReLU(),
            nn.Linear(ff_dim, embed_dim),
        )
    def forward(self, x):
        return self.net(x)


class TransformerBlock(nn.Module):
    def __init__(self, embed_dim, n_head):
        super().__init__()
        self.layer_norm1 = nn.LayerNorm(embed_dim)
        self.multihead_atten = MultiheadAttention(embed_dim, n_head)

        self.layer_norm2 = nn.LayerNorm(embed_dim)
        self.feed_forward = FeedFoward(embed_dim, 4*embed_dim)

    def forward(self, x):
        x = x + self.multihead_atten(self.layer_norm1(x))
        x = x + self.feed_forward(self.layer_norm2(x))
        return x


In [8]:

class TransformerNameGenderClassifier(nn.Module):
    def __init__(self, char_size, embed_dim, n_heads, n_layers, max_len, num_classes=2):
        super().__init__()
        self.embed_dim = embed_dim
        self.char_embedding = nn.Embedding(char_size, embed_dim)
        self.positional_encoding = nn.Embedding(max_len, embed_dim)
        self.transformer_blocks = nn.Sequential(*[TransformerBlock(embed_dim, n_heads) for _ in range(n_layers)])
        self.ln_f = nn.LayerNorm(embed_dim)
        self.classifier = nn.Linear(embed_dim, num_classes)

    def forward(self, x):
        char_embeddings = self.char_embedding(x)  # [batch_size, seq_length, embed_dim]
        positions = torch.arange(0, x.size(1), device=x.device).unsqueeze(0)  # [1, seq_length]
        pos_embeddings = self.positional_encoding(positions)  # [1, seq_length, embed_dim]
        x = char_embeddings + pos_embeddings
        x = self.transformer_blocks(x)
        x = self.ln_f(x)
        x = x.mean(dim=1)
        logits = self.classifier(x)
        return logits

# Parameters
char_size = len(stoi)
max_len = char_length  # Max length of name

model = TransformerNameGenderClassifier(char_size=char_size, embed_dim=n_embed, n_heads=n_head, n_layers=n_layer, max_len=max_len)


In [11]:
import numpy as np

def get_batch(df, batch_size):
    # Randomly sample a batch of data
    batch = df.sample(n=batch_size)
    names = batch['Name'].values
    genders = batch['Gender'].values

    # Encode names and genders
    encoded_names = np.array([encode_name(name) for name in names])
    encoded_genders = np.array([gen2num[gender] for gender in genders])

    # Convert to PyTorch tensors
    names_tensor = torch.tensor(encoded_names, dtype=torch.long)
    genders_tensor = torch.tensor(encoded_genders, dtype=torch.long)

    return names_tensor, genders_tensor

# Example usage:
batch_size = 4
names_tensor, genders_tensor = get_batch(df, batch_size)
print("Names Tensor:", names_tensor)
print("Genders Tensor:", genders_tensor)

Names Tensor: tensor([[10,  4, 13,  0,  3,  8, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26],
        [ 0, 17,  8, 18, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26],
        [18, 19, 17, 24, 10,  4, 17, 26, 26, 26, 26, 26, 26, 26, 26, 26],
        [ 4, 21,  0, 13,  6,  4, 11, 14, 18, 26, 26, 26, 26, 26, 26, 26]])
Genders Tensor: tensor([0, 1, 1, 1])


In [12]:
import torch
import torch.nn as nn
import torch.optim as optim

# Assuming df is your DataFrame and the TransformerNameGenderClassifier is defined and ready.

# Model parameters
vocab_size = len(stoi)  # Number of unique characters
embed_dim = 32  # Size of character embeddings
n_heads = 4  # Number of attention heads
n_layers = 4  # Number of transformer blocks
max_len = char_length  # Maximum length of a name
num_classes = 2  # Gender classes: F or M

# Instantiate the model
model = TransformerNameGenderClassifier(vocab_size, embed_dim, n_heads, n_layers, max_len, num_classes)
model.to(my_device)
model.train()  # Set the model to training mode

# Loss Function and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())

# Training parameters
epochs = 50
batch_size = 64

for epoch in range(epochs):
    total_loss = 0
    for _ in range(len(df) // batch_size):
        # Get a batch of data
        names_tensor, genders_tensor = get_batch(df, batch_size)
        names_tensor = names_tensor.to(my_device)
        genders_tensor = genders_tensor.to(my_device)

        # Zero the gradients
        optimizer.zero_grad()
        predictions = model(names_tensor)

        # Compute and print loss
        loss = criterion(predictions, genders_tensor)
        total_loss += loss.item()

        # Backward pass: Compute gradient of the loss with respect to model parameters
        loss.backward()

        # Calling the step function on an Optimizer makes an update to its parameters
        optimizer.step()

    # Print average loss for the epoch
    print(f'Epoch {epoch+1}, Loss: {total_loss / (len(df) // batch_size)}')

Epoch 1, Loss: 0.5207702869342433
Epoch 2, Loss: 0.3988763694134023
Epoch 3, Loss: 0.3855010283490022
Epoch 4, Loss: 0.35684975516051054
Epoch 5, Loss: 0.3401740736121105
Epoch 6, Loss: 0.3234824066878193
Epoch 7, Loss: 0.30880234447411364
Epoch 8, Loss: 0.30198006404356825
Epoch 9, Loss: 0.2771549000301295


KeyboardInterrupt: 

In [13]:
model.eval()
model.to(torch.device('cpu'))

# Encode the name and add an extra batch dimension
names_tensor = torch.tensor(encode_name("nocope"), dtype=torch.long)[None, :]

# Perform the prediction
with torch.no_grad():
    pred = model(names_tensor)

predicted_index = pred.argmax(1).item()
print(f"Predicted class: {num2gen[predicted_index]}")


Predicted class: M


In [14]:
#Use Torch encoder

class TransformerNameGenderClassifier(nn.Module):
    def __init__(self, char_size, embed_dim, n_heads, n_layers, max_len, num_classes=2):
        super().__init__()
        self.embed_dim = embed_dim
        self.char_embedding = nn.Embedding(char_size, embed_dim)
        self.positional_encoding = nn.Embedding(max_len, embed_dim)
        encoder_layer = nn.TransformerEncoderLayer(embed_dim, n_heads, dim_feedforward=4 * embed_dim, batch_first=True, norm_first=True)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=n_layers)

        self.ln_f = nn.LayerNorm(embed_dim)
        self.classifier = nn.Linear(embed_dim, num_classes)

    def forward(self, x):
        char_embeddings = self.char_embedding(x)  # [batch_size, seq_length, embed_dim]
        positions = torch.arange(0, x.size(1), device=x.device).unsqueeze(0)  # [1, seq_length]
        pos_embeddings = self.positional_encoding(positions)  # [1, seq_length, embed_dim]
        x = char_embeddings + pos_embeddings
        x = self.transformer_encoder(x)
        x = self.ln_f(x)
        x = x.mean(dim=1)
        logits = self.classifier(x)
        return logits

# Parameters
char_size = len(stoi)
max_len = char_length  # Max length of name

model = TransformerNameGenderClassifier(char_size=char_size, embed_dim=n_embed, n_heads=n_head, n_layers=n_layer, max_len=max_len)


