In [None]:
# Numerical Operations
import numpy as np
import math

# Reading/Writing Data
import pandas as pd
import pyarrow.parquet as pq
import os
import sys
import json

# Plotting 
import matplotlib.pyplot as plt
import plotly.express as px

# For Progress Bar
from tqdm.notebook import tqdm

# Processing data
from multiprocessing import Pool

# set the maximum number of rows to display to 500
pd.set_option('display.max_rows', 500)

print(f'Python V{sys.version}')

In [None]:
DEV = True
DEV = False
SEED = 42

# Load Input

number of samples in **train.csv**: 94477

- **Each Parquet file has a variable length.. (#frames)**
- All the frames are of equal size (size: 543)
    - face: 468
    - left_hand: 21
    - pose: 33
    - right_hand: 21

In [None]:
train_df = pd.read_csv("/kaggle/input/asl-signs/train.csv")
print(f"\n... train.csv shape: {train_df.shape}\n")
display(train_df.head())

In [None]:
def get_full_path(path):
    return os.path.join("/kaggle/input/asl-signs/", path)

print(f"\n... parquet path: {get_full_path(train_df['path'].iloc[0])}\n")
tmp_df = pd.read_parquet(get_full_path(train_df['path'].iloc[0]))
print(f"\n... example parquet shape: {tmp_df.shape}\n")
display(tmp_df)

print(f"\n...unique type: {tmp_df['type'].unique()}\n")

display(tmp_df.query("type == 'face'").describe())
display(tmp_df.query("type == 'left_hand'").describe())
display(tmp_df.query("type == 'right_hand'").describe())
display(tmp_df.query("type == 'pose'").describe())

print(f"\n...show landmark_index of frame:\n")
print(tmp_df.query("frame == 20")['landmark_index'].values)

## Statistics of data in train

In [None]:
print(f"\n... without duplicate path: {train_df['path'].nunique() == train_df.shape[0]}")

fig = px.histogram(train_df, y="sign", color="sign", orientation="h", height=5000,
    labels={"count":"<b>Total Row Count</b>"}, title="<b>Row Counts by Sign (label)</b>",
    category_orders={"sign": train_df["sign"].value_counts().index}
)
fig.update_yaxes(title_text="<b>Total Row Count</b>")
fig.update_layout(showlegend=False)
fig.show()

## Statistics of frames

Frame length
- Min: 2
- Max: 537

```
count    94477.000000
mean        37.935021
std         44.177069
min          2.000000
1%           6.000000
5%           6.000000
25%         12.000000
50%         22.000000
75%         44.000000
95%        135.000000
99%        219.000000
99.9%      300.524000
max        537.000000
```

Missing Frame
- Min: 0
- Min with missing: 1
- Max: 102
- Has Missing Frame: 43

In [None]:
if DEV:
    train_df = pd.read_csv('/kaggle/input/asl-signs/train.csv').sample(int(5e3), random_state=SEED)
else:
    train_df = pd.read_csv('/kaggle/input/asl-signs/train.csv')
    
with open("/kaggle/input/asl-signs/sign_to_prediction_index_map.json", "r") as f:
    dict = json.load(f)    

N_SAMPLES = len(train_df)
print(f'N_SAMPLES: {N_SAMPLES}')

In [None]:
def get_unique_missing(path):
    df = pd.read_parquet(get_full_path(path))
    n_unique_frames = df['frame'].nunique()
    n_miss_frames = df['frame'].max() - df['frame'].min() + 1 - n_unique_frames
    return n_unique_frames, n_miss_frames

with Pool() as pool:
    results = list(tqdm(pool.imap(get_unique_missing, train_df['path']), total=len(train_df)))
    
N_UNIQUE_FRAMES = np.array([result[0] for result in results])
N_MISS_FRAMES = np.array([result[1] for result in results])

In [None]:
# Total Frame Count
fig = px.histogram(N_UNIQUE_FRAMES, title=f"<b>Total Frame Count (#samples={N_SAMPLES})</b>")
fig.update_xaxes(title_text="<b>Frame Count</b>")
fig.update_yaxes(title_text="<b>Frequency</b>")
fig.update_layout(showlegend=False)
fig.show()

print(f"... Min: {N_UNIQUE_FRAMES.min()}\n")
print(f"... Max: {N_UNIQUE_FRAMES.max()}\n")

PERCENTILES = [0.01, 0.05, 0.25, 0.50, 0.75, 0.95, 0.99, 0.999]
display(pd.Series(N_UNIQUE_FRAMES).describe(percentiles=PERCENTILES))

In [None]:
# Missing Frame Count
# fig = px.histogram(N_MISS_FRAMES, title=f"<b>Missing Frame Count (#samples={N_SAMPLES})</b>")
# fig.update_xaxes(title_text="<b>Frame Count</b>")
# fig.update_yaxes(title_text="<b>Frequency</b>")
# fig.update_layout(showlegend=False)
# fig.show()

print(f"... Min(with miss): {N_MISS_FRAMES[N_MISS_FRAMES > 0].min()}\n")
print(f"... Max: {N_MISS_FRAMES.max()}\n")
print(f"... Has Missing Frame Count: {(N_MISS_FRAMES > 0).sum()}\n")

### (ToDo) Inspect >300 & <6

In [None]:
idx_gt_300 = np.where(N_UNIQUE_FRAMES > 300)
print(f"\n... #frames greater than 300: \n\t{idx_gt_300}")

idx_lt_6 = np.where(N_UNIQUE_FRAMES < 6)
print(f"\n... #frames less than 6: \n\t{idx_lt_6}")

# Show frame

## simple modelＩPreprocess

Preprocess
- Use mean and std of coordinate(x, y, z) from lip(40), hands(21+21) and pose(33) as traing data.
- dataset size (94477, 690)
    - 690 is from (40+21+21+33)*3*2

Training
- input size (N, 64, 690)
- NN model
    - liner
    - droupout
    - batch normalization
    - activation function: gelu
    - output: #class=250
- corss validation: 5-fold
- 40 epoch
- loss fn.: CrossEntropy
- optimizer: Adam with LearningRate scheduler

In [None]:
LIPS_IDXS = np.array([
        61, 185, 40, 39, 37, 0, 267, 269, 270, 409,
        291, 146, 91, 181, 84, 17, 314, 405, 321, 375,
        78, 191, 80, 81, 82, 13, 312, 311, 310, 415,
        95, 88, 178, 87, 14, 317, 402,318, 324, 308,
    ])
# left_hand: 468:489
# pose: 489:522
# right_hand: 522:543
    
def create_features(row):
    # whe using `train_df.values`
    # path = row[0]
    # sign = row[3]
    
    # whe using `train_df.iterrows()`
    path = row[1].path
    sign = row[1].sign
    df = pd.read_parquet(get_full_path(path), columns=['x', 'y', 'z'])
    df = df.fillna(0)
    n_frames = int(df.shape[0]/543)
    sample_arr = df.values.reshape(n_frames, 543, 3)

    # (N, 40, 3)
    lip = sample_arr[:, LIPS_IDXS, :]
    left_hand = sample_arr[:, 468:489, :]
    pose = sample_arr[:, 489:522, :]
    right_hand = sample_arr[:, 522:, :]

    # mean, std
    lip_mean = lip.mean(axis=0).reshape(-1)
    lip_std = lip.std(axis=0).reshape(-1)
    left_hand_mean = left_hand.mean(axis=0).reshape(-1)
    left_hand_std = left_hand.std(axis=0).reshape(-1)
    pose_mean = pose.mean(axis=0).reshape(-1)
    pose_std = pose.std(axis=0).reshape(-1)
    right_hand_mean = right_hand.mean(axis=0).reshape(-1)
    right_hand_std = right_hand.std(axis=0).reshape(-1)

    # concate
    ## lip, r_hand, l_hand, pose -> shape will be (2*40*3 + 2*21*3 + 2*21*3 + 2*33*3, ) == (690, )
    features = np.concatenate((lip_mean, lip_std, left_hand_mean, left_hand_std, pose_mean, pose_std, right_hand_mean, right_hand_std))
    
    return features, dict.get(sign)

# when using `train_df.iterrows()`, `row` is different from `pool.map()` so this can't work
#
# row = train_df.iloc[0]
# features, sign = create_features(row)
# print(features.shape)
# print(sign)

In [None]:
"""
... train_df.iterrows()

using multiprocessing and pool.map() to apply a function to each row of a pandas DataFrame, 
pool.map() passes each row as a tuple
"""

with Pool() as pool:
#     results = list(tqdm(pool.imap(create_features, train_df.values, chunksize=250)))
    results = list(tqdm(pool.imap(create_features, train_df.iterrows(), chunksize=500), total=len(train_df)))
    
data_X = np.array([res[0] for res in results])
data_y = np.array([res[1] for res in results])

print(data_X.shape)
print(data_y.shape)

# for i, row in train_df[:10].iterrows():
#     features, label = create_features(row)
#     print(features.shape)
#     print(label)

# Training

In [None]:
import torch
import torch.nn as nn

from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import torch.nn.functional as F

In [None]:
class ASLData(Dataset):
    def __init__(self, datax, datay):
        self.datax = datax
        self.datay = datay
        
    def __getitem__(self, index):
        return self.datax[index,:], self.datay[index]
        
    def __len__(self):
        return len(self.datay)

    
class ASLModel(nn.Module):
    def __init__(self, p):
        super(ASLModel, self).__init__()
        self.layer0 = nn.Linear(690, 2048)
        self.bn0 = nn.BatchNorm1d(2048)  # Batch normalization layer
        self.dropout = nn.Dropout(p=p)  # Dropout layer
        self.layer1 = nn.Linear(2048, 1024)
        self.bn1 = nn.BatchNorm1d(1024)
        self.layer2 = nn.Linear(1024, 512)
        self.bn2=nn.BatchNorm1d(512)
        self.layer3=nn.Linear(512,500)
        
    def forward(self, x):
        x = self.layer0(x)
        x = self.bn0(x)
        x = F.gelu(x)
        x = self.dropout(x)
        x = self.layer1(x)
        x = self.bn1(x)
        x = F.gelu(x)
        x = self.dropout(x)
        x = self.layer2(x)
        x = F.gelu(x)
        x = self.layer3(x)
        return x

In [None]:
from sklearn.model_selection import KFold

EPOCHS = 50
BATCH_SIZE = 128
N_SPLITS = 5  # Number of cross-validation folds

train_loss = np.zeros((N_SPLITS, EPOCHS))
train_acc = np.zeros((N_SPLITS, EPOCHS))
val_loss = np.zeros((N_SPLITS, EPOCHS))
val_acc = np.zeros((N_SPLITS, EPOCHS))

kf = KFold(n_splits=N_SPLITS, shuffle=True, random_state=42)

# Iterate over cross-validation folds
fold = 0
for train_idx, val_idx in kf.split(data_X):
    print(f"\n\n... Training on Fold: {fold+1}\n")
    trainx, valx = data_X[train_idx], data_X[val_idx]
    trainy, valy = data_y[train_idx], data_y[val_idx]

    train_data = ASLData(trainx, trainy)
    val_data = ASLData(valx, valy)

    train_loader = DataLoader(train_data, batch_size=BATCH_SIZE, num_workers=4, shuffle=True)
    val_loader = DataLoader(val_data, batch_size=BATCH_SIZE, num_workers=4, shuffle=False)

    model = ASLModel(0.4).cuda()
    opt = torch.optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss()
    sched = torch.optim.lr_scheduler.StepLR(opt, step_size=300, gamma=0.9)

    for epoch in range(EPOCHS):
        model.train()
        train_loss_sum = 0.
        train_correct = 0
        train_total = 0

        for x, y in train_loader:
            x = torch.Tensor(x).float().cuda()
            y = torch.Tensor(y).long().cuda()

            y_pred = model(x)

            loss = criterion(y_pred, y)
            loss.backward()
            opt.step()
            opt.zero_grad()

            train_loss_sum += loss.item()
            train_correct += np.sum((np.argmax(y_pred.detach().cpu().numpy(), axis=1) == y.cpu().numpy()))
            train_total += 1
            sched.step()

        val_loss_sum = 0.
        val_correct = 0
        val_total = 0

        model.eval()
        for x, y in val_loader:
            x = torch.Tensor(x).float().cuda()
            y = torch.Tensor(y).long().cuda()

            with torch.no_grad():
                y_pred = model(x)
                loss = criterion(y_pred, y)
                val_loss_sum += loss.item()
                val_correct += np.sum((np.argmax(y_pred.cpu().numpy(), axis=1) == y.cpu().numpy()))
                val_total += 1

        train_loss[fold, epoch] = train_loss_sum / train_total
        train_acc[fold, epoch] = train_correct / len(train_data)
        val_loss[fold, epoch] = val_loss_sum / val_total
        val_acc[fold, epoch] = val_correct / len(val_data)

        print(f"Epoch:{epoch} > Train Loss: {(train_loss_sum / train_total):.04f}, Train Acc: {train_correct / len(train_data):0.04f}")
        print(f"Epoch:{epoch} > Val Loss: {(val_loss_sum / val_total):.04f}, Val Acc: {val_correct / len(val_data):0.04f}")
        print("=" * 50)

    fold += 1

# Loss and Accuracy

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Assuming N_SPLITS and EPOCHS have appropriate values

# Calculate the mean and standard deviation of the loss across folds
mean_train_loss = np.mean(train_loss, axis=0)
mean_val_loss = np.mean(val_loss, axis=0)
std_train_loss = np.std(train_loss, axis=0)
std_val_loss = np.std(val_loss, axis=0)

# Generate the x-axis values (epochs)
epochs = range(1, EPOCHS + 1)

# Plot the average loss curves with error bars
plt.errorbar(epochs, mean_train_loss, yerr=std_train_loss, label='Train')
plt.errorbar(epochs, mean_val_loss, yerr=std_val_loss, label='Validation')

# Set plot labels and title
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.title("Loss Curve (Cross-Validation)")
plt.legend()

# Display the plot
plt.show()


In [None]:
# Assuming N_SPLITS and EPOCHS have appropriate values

# Calculate the mean and standard deviation of the loss across folds
mean_train_acc = np.mean(train_acc, axis=0)
mean_val_acc = np.mean(val_acc, axis=0)
std_train_acc = np.std(train_acc, axis=0)
std_val_acc = np.std(val_acc, axis=0)

# Generate the x-axis values (epochs)
epochs = range(1, EPOCHS + 1)

# Plot the average loss curves with error bars
plt.errorbar(epochs, mean_train_acc, yerr=std_train_acc, label='Train')
plt.errorbar(epochs, mean_val_acc, yerr=std_val_acc, label='Validation')

# Set plot labels and title
plt.xlabel("Epochs")
plt.ylabel("Acc")
plt.title("Acc Curve (Cross-Validation)")
plt.legend()

# Display the plot
plt.show()

## old


In [None]:
import seaborn as sns

# Plotting loss
sns.lineplot(train_loss, label='train_loss')
sns.lineplot(val_loss, label='val_loss')

# Set plot title and labels
plt.title('Plot of train_loss and val_loss')
plt.xlabel('epoch')
plt.ylabel('loss')

# Display the legend
plt.legend()

# Show the plot
plt.show()


# Plotting accuracy
sns.lineplot(train_acc, label='train_acc')
sns.lineplot(val_acc, label='val_acc')

# Set plot title and labels
plt.title('Plot of train_acc and val_acc')
plt.xlabel('epoch')
plt.ylabel('acc')

# Display the legend
plt.legend()

# Show the plot
plt.show()