In [None]:
import pandas as pd
import os
import glob 
import shutil

# Read class data
annotation_df = pd.read_csv("/kaggle/input/ultra-wide-band-pose-prediction/annotations.csv")
annotation_df

In [None]:
!rm -rf /kaggle/working/*

In [None]:
# create directories and class folders
try:
    dir_name = "/kaggle/working/raw_data"
    
    os.makedirs(dir_name)
except:
    pass
for i in range(7):
    try:
        os.makedirs(os.path.join(dir_name, str(i)))
    except:
        continue

# Copy files into thier classes
indir = "/kaggle/input/ultra-wide-band-pose-prediction/train/train"
outdir = "/kaggle/working/raw_data"
in_testdir = "/kaggle/input/ultra-wide-band-pose-prediction/test/test"


for file_ in os.listdir(indir):
    for i in range(len(annotation_df)):
        if file_.split(".npy")[0] == annotation_df['id'].iloc[i]:
            file_old = os.path.join(indir, file_)
            file_new = os.path.join(outdir, str(annotation_df['class'].iloc[i]))
            shutil.copy(file_old,file_new)
        
# Check number of file in each directories
for i in range(7):
    print(f"Class {i} =", len(glob.glob(os.path.join(dir_name, str(i), "*"))))
print("Sum =", len(glob.glob(os.path.join(dir_name, "*", "*"))))

# File preparation

In [None]:
import os
import numpy as np
import glob

baseURL = "/kaggle/working/raw_data"
base_testURL = "/kaggle/input/ultra-wide-band-pose-prediction/test/test/*.npy"
# Get all files from directories
# e.g. file_list[i][j] -> Choose npy number j th from class i
file_list = []
file_testlist = glob.glob(base_testURL)

for i in range(7):
    files = glob.glob(baseURL+'/'+str(i)+'/*')
    files.sort()
    file_list.append(files)


# Visualization

In [None]:
# Get sample of image for testing
test_img_path = file_list[0][0]
test_img = np.load(test_img_path)
test_img = np.transpose(test_img)
test_img.shape

## Distance

### Surface visualization

In [None]:
import numpy as np
import plotly.graph_objs as go
import cmath

# Create a meshgrid of x and y values
x = np.arange(0, test_img.shape[1])
y = np.arange(0, test_img.shape[0])
X, Y = np.meshgrid(x, y)

# Set up amplitude to distance (absolute of data)
Z = np.abs(test_img)

# Create a surface plot of the data using plotly
fig = go.Figure(data=[go.Surface(x=X, y=Y, z=Z)])

# Set the axis titles
fig.update_layout(scene=dict(xaxis_title='Time (1/256 sec)',
                             yaxis_title='Sampler index',
                             zaxis_title='Distance'))

fig.show()

In [None]:
import matplotlib.pyplot as plt

plt.imshow(np.abs(test_img), aspect='auto')

## Velocity

### Surface visualization

In [None]:
import numpy as np
import plotly.graph_objs as go
import cmath

# Create a meshgrid of x and y values
x = np.arange(0, test_img.shape[1])
y = np.arange(0, test_img.shape[0])
X, Y = np.meshgrid(x, y)

# Set up amplitude to velocity (arctan of data)
Z = np.angle(test_img)

# Create a surface plot of the data using plotly
fig = go.Figure(data=[go.Surface(x=X, y=Y, z=Z)])

# Set the axis titles
fig.update_layout(scene=dict(xaxis_title='Time (1/256 sec)',
                             yaxis_title='Sampler index',
                             zaxis_title='Velocity'))

fig.show()

In [None]:
import matplotlib.pyplot as plt

plt.imshow(np.angle(test_img), aspect='auto')

In [None]:
# Set class name for displaying
class_name = [
    'Stumble',
    'Jump',
    'Lay down',
    'Run',
    'Stand -> Sit',
    'Sit -> Stand/Walk',
    'Walk'
]

## Method1: Using filter


In [None]:
import numpy as np
import scipy.signal as signal

# Band-pass filter
def bp_filter_signal(complex_img, low=0.008, high=0.1):

  # === Band-pass filter === #
    img_fft = np.fft.fft2(complex_img)

    low_cutoff = low
    high_cutoff = high

  # Create a mask for the filter function
    ny, nx = img_fft.shape
    u, v = np.meshgrid(np.fft.fftfreq(nx), np.fft.fftfreq(ny))
    d = np.sqrt(u**2 + v**2)
    mask = np.logical_and(d > low_cutoff, d < high_cutoff)

  # Apply the filter to the Fourier transform
    bp_img = img_fft * mask

  # Compute the inverse Fourier transform to get the filtered image
    filter_img = np.fft.ifft2(bp_img).real
    return filter_img

# High-pass then Band-pass filter
def hpbp_filter_signal(complex_img, high_cut=0.0005, low=0.008, high=0.1):
  # === High-pass filter === #
    filter_order = 3
    b, a = signal.butter(filter_order, high_cut, 'high')
    hp_img = signal.filtfilt(b, a, complex_img)

  # === Band-pass filter === #
    img_fft = np.fft.fft2(hp_img)
    low_cutoff = low
    high_cutoff = high

  # Create a mask for the filter function
    ny, nx = img_fft.shape
    u, v = np.meshgrid(np.fft.fftfreq(nx), np.fft.fftfreq(ny))
    d = np.sqrt(u**2 + v**2)
    mask = np.logical_and(d > low_cutoff, d < high_cutoff)

  # Apply the filter to the Fourier transform
    bp_img = img_fft * mask

  # Compute the inverse Fourier transform to get the filtered image
    filter_img = np.fft.ifft2(bp_img).real
    return filter_img

# Band-pass then High-pass filter
def bphp_filter_signal(complex_img, high_cut=0.0005, low=0.008, high=0.1):
  
  # === Band-pass filter === #
    img_fft = np.fft.fft2(complex_img)

    low_cutoff = low
    high_cutoff = high

  # Create a mask for the filter function
    ny, nx = img_fft.shape
    u, v = np.meshgrid(np.fft.fftfreq(nx), np.fft.fftfreq(ny))
    d = np.sqrt(u**2 + v**2)
    mask = np.logical_and(d > low_cutoff, d < high_cutoff)

  # Apply the filter to the Fourier transform
    bp_img = img_fft * mask

  # Compute the inverse Fourier transform to get the filtered image
    bp_img = np.fft.ifft2(bp_img).real

  # === High-pass filter === #
    filter_order = 3
    b, a = signal.butter(filter_order, high_cut, 'high')
    filter_img = signal.filtfilt(b, a, bp_img)

    return filter_img

### Result

Filter with Band-pass filter

In [None]:
fig, axs = plt.subplots(5, 7, figsize=(20, 10))

# Iterate over the images and plot each one
for i in range(5):
    for j in range(7):
        sample_img = np.load(file_list[j][i])
        sample_img = np.transpose(sample_img)
        distance_img = np.abs(sample_img)
        filter_img = bp_filter_signal(distance_img, 0.008, 0.1)     # Using bp filter
        filter_img = filter_img.clip(min=0)                       # set minimum value to 0
        axs[i][j].imshow(filter_img, cmap='jet', aspect='auto')
        if i==0:
            axs[i][j].set_title(class_name[j])
plt.tight_layout()
plt.show()

Filter with Band-pass then High-pass filter

In [None]:
fig, axs = plt.subplots(5, 7, figsize=(20, 10))

# Iterate over the images and plot each one
for i in range(5):
    for j in range(7):
        sample_img = np.load(file_list[j][i])
        sample_img = np.transpose(sample_img)
        distance_img = np.abs(sample_img)
        filter_img = bphp_filter_signal(distance_img, 0.004, 0.0001, 1)
        filter_img = filter_img.clip(min=0)
        axs[i][j].imshow(filter_img, cmap='jet', aspect='auto')
        if i==0:
            axs[i][j].set_title(class_name[j])
plt.tight_layout()
plt.show()

## Method2: Using Wavelet transform

### Normal size

In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import pywt
from PIL import Image

def wavelet_transform(complex_img):
  # Perform the 2D Wavelet transform using the Haar wavelet
    coeffs = pywt.dwt2(complex_img, 'haar')

  # Get the approximation and detail coefficients
    cA, (cH, cV, cD) = coeffs

  # Calculate the magnitude of the complex numbers in the approximation coefficients
    mag_wav = np.abs(cA)

  # Normalize the magnitude to the range [0, 255]
    wavelet_img = 255.0 * mag_wav / np.max(mag_wav)

    return mag_wav


In [None]:
fig, axs = plt.subplots(5, 7, figsize=(20, 10))

# Iterate over the images and plot each one
for i in range(5):
    for j in range(7):
        sample_img = np.load(file_list[j][i])
        distance_img = np.abs(sample_img)
        filter_img = wavelet_transform(distance_img)
        filter_img = np.transpose(filter_img)
        filter_img = filter_img.clip(min=0)
        axs[i][j].imshow(filter_img, cmap='jet', aspect='auto')
    if i==0:
        axs[i][j].set_title(class_name[j])
plt.tight_layout()
plt.show()

### Up-sampling

In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import pywt
from PIL import Image

def wavelet_upsampling(complex_img, upsamp_factor=2):
  # Perform the 2D Wavelet transform using the Haar wavelet
    coeffs = pywt.dwt2(complex_img, 'haar')

  # Get the approximation and detail coefficients
    cA, (cH, cV, cD) = coeffs

  # Calculate the magnitude of the complex numbers in the approximation coefficients
    mag_wav = np.abs(cA)

  # Normalize the magnitude to the range [0, 255]
    wavelet_img = 255.0 * mag_wav / np.max(mag_wav)

  # Upsample the image
    wavelet_img = np.kron(wavelet_img, np.ones((upsamp_factor,upsamp_factor)))

    return wavelet_img


In [None]:
fig, axs = plt.subplots(5, 7, figsize=(20, 10))

# Iterate over the images and plot each one
for i in range(5):
    for j in range(7):
        sample_img = np.load(file_list[j][i])
        distance_img = np.abs(sample_img)
        filter_img = wavelet_upsampling(distance_img)
        filter_img = np.transpose(filter_img)
        filter_img = filter_img.clip(min=0)
        axs[i][j].imshow(filter_img, cmap='jet', aspect='auto')
        if i==0:
            axs[i][j].set_title(class_name[j])
plt.tight_layout()
plt.show()

Method3

In [None]:
def range_time(IQ_data):
    n_rd_history = 256
    frame = []
    frames = []

    for iqini in IQ_data:
        if len(frame)<n_rd_history:
            frame.append(iqini)
        else:  
            frames.append(np.copy(frame))
            frame.append(iqini)
            frame = frame[1::]
            
    return np.stack(frames)

def range_frequency(datas):
    Range_frequency_frame = []
    for data in datas:
        # Range-Doppler
        rd = np.fft.fft(data, axis=0)
        rd = np.fft.fftshift(rd, axes=0)
        rd = np.abs(rd)
        DBrd = 20 * np.log10(rd+1e-10)
        Range_frequency_frame.append(DBrd)
    return np.stack(Range_frequency_frame)

def srf_transform(complex_img, half=False):
    img = range_time(complex_img)
    img = range_frequency(img)
    if half:
        img = img[:, :img.shape[1]//2, :]
    srf_img = img.reshape(img.shape[0]*img.shape[1], img.shape[2]).real
    return srf_img.T

In [None]:
fig, axs = plt.subplots(2, 7, figsize=(20, 4))

# Iterate over the images and plot each one
for i in range(2):
    for j in range(7):
        sample_img = np.load(file_list[j][i])
        distance_img = np.abs(sample_img)
        filter_img = srf_transform(distance_img)
        axs[i][j].imshow(filter_img, cmap='jet', aspect='auto')
        if i==0:
            axs[i][j].set_title(class_name[j])
plt.tight_layout()
plt.show()

# Save image

In [None]:
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib
from tqdm import tqdm
import os
matplotlib.use('Agg')
output_dir = "/kaggle/working/out_pre"    # : EDIT HERE : Add your output directory where the full output path is <your path>/train/class_id/filename.png
dpi = 100                     # : EDIT HERE : Select you dpi of output image
height = 224                  # : EDIT HERE : Select you image height
width = 224                   # : EDIT HERE : Select you image width

os.makedirs(output_dir, exist_ok=True)
for i in range(7):
    os.makedirs(output_dir+"/train/"+str(i), exist_ok=True)
# Export images


for i, class_files in enumerate(file_list):
    for j, file_path in enumerate(tqdm(class_files)):
    # select image
        image = np.load(file_list[i][j])
        distance_img = np.abs(image)                              # : EDIT HERE : select your amplitude (abs: distance, angle: velocity)
        filter_img = srf_transform(distance_img)                  # : EDIT HERE : select your function

    # SAVE FIG
        fig, axs = plt.subplots(1, 1, figsize=(width/dpi, height/dpi))
        axs.imshow(filter_img, cmap='jet', aspect='auto')         # : EDIT HERE : select your camp
        axs.axis('off')
        filename = file_path.split('/')[-1].replace('.npy', '.png')
        fig.savefig(os.path.join(output_dir, "train", str(i), filename), dpi=100, bbox_inches='tight', pad_inches=0)
        plt.close(fig)

In [None]:
os.makedirs(output_dir+"/test/", exist_ok=True)
for i, file_path in enumerate(tqdm(file_testlist)):
    age = np.load(file_testlist[i])
    distance_img = np.abs(image)                              # : EDIT HERE : select your amplitude (abs: distance, angle: velocity)
    filter_img = srf_transform(distance_img)                  # : EDIT HERE : select your function

# SAVE FIG
    fig, axs = plt.subplots(1, 1, figsize=(width/dpi, height/dpi))
    axs.imshow(filter_img, cmap='jet', aspect='auto')         # : EDIT HERE : select your camp
    axs.axis('off')
    filename = file_path.split('/')[-1].replace('.npy', '.png')
    fig.savefig(os.path.join(output_dir, "test", filename), dpi=100, bbox_inches='tight', pad_inches=0)
    plt.close(fig)

In [None]:
!pip install torch transformers datasets evaluate pillow==9.2.0
!pip install git+https://github.com/rwightman/pytorch-image-models.git


In [None]:
import torch
import torch.nn as nn
import torchvision.transforms as T
from torch.utils.data import DataLoader, random_split

# Pytorch Image model (TIMM) library: a library for state-of-the-art image classification
import timm
import timm.optim
import timm.scheduler
from timm.data import ImageDataset, create_dataset, create_loader
from timm.data.transforms_factory import create_transform

import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay
from PIL import Image

import evaluate

import numpy as np
import pandas as pd
from scipy import stats

from tqdm.notebook import tqdm

import glob

from sklearn.model_selection import KFold

from copy import copy

import shutil

In [None]:
# Collect all train image path
train_files = glob.glob("/kaggle/working/out_pre/train/**/*.png")
test_files = glob.glob("/kaggle/working/out_pre/test/*.png")

In [None]:
# Visualize image
example = Image.open(train_files[0]).convert("RGB")
display(example)

In [None]:
# Transform image data based on ImageNet's mean and std
transforms = {
    "train": T.Compose([
        T.Resize((224, 224), interpolation=T.InterpolationMode.BICUBIC),
        T.ToTensor(),
        T.Normalize(mean=torch.tensor([0.4850, 0.4560, 0.4060]), std=torch.tensor([0.2290, 0.2240, 0.2250]))
    ]),
    "test": T.Compose([
        T.Resize((224, 224), interpolation=T.InterpolationMode.BICUBIC),
        T.ToTensor(),
        T.Normalize(mean=torch.tensor([0.4850, 0.4560, 0.4060]), std=torch.tensor([0.2290, 0.2240, 0.2250]))
    ])
}

In [None]:
toImage = T.ToPILImage()

In [None]:
# Display an example of transform images
toImage(transforms["train"](example))

In [None]:
# Assign dataset from train signal
dataset = ImageDataset("/kaggle/working/out_pre/train", transform=transforms["train"])

In [None]:
# Select device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
# Select model (List of available is shown above)
model_name = "hf_hub:timm/maxvit_base_tf_224.in1k"    # : EDIT HERE : Change model name

In [None]:
num_epochs = 25
criterion = nn.CrossEntropyLoss()

# Cross Validation Configuration
k_splits = 5
metric = evaluate.load("f1")

In [None]:
# Cross validation
kf = KFold(n_splits=k_splits, shuffle=True, random_state=42)

In [None]:
# Gradient Accumulation Settings
# Set to 1 for no accumulation
train_batch_size = 8
eval_batch_size = 16
num_accumulate = 4

In [None]:
all_eval_scores = []

for fold, (train_idx, val_idx) in enumerate(kf.split(dataset)):
    print(f"Fold {fold+1} of 5")

    # Load Model
    model = timm.create_model(model_name, pretrained=True, num_classes=7).to(device)

    # Load Optimizer and Scheduler
    optimizer = timm.optim.create_optimizer_v2(model, opt="AdamW", lr=1e-3)
    optimizer = timm.optim.Lookahead(optimizer, alpha=0.5, k=6)                           # update the slow weight every k steps
                                                                                          # update the optimizer by combine slow weight and fast weight * alpha
    
    scheduler = timm.scheduler.create_scheduler_v2(optimizer, num_epochs=num_epochs)[0]

    # Load Data: split train and valition set based on kfold
    train_dataset = torch.utils.data.Subset(dataset, train_idx)
    val_dataset = torch.utils.data.Subset(dataset, val_idx)
    
    train_dataloader = DataLoader(train_dataset, batch_size=train_batch_size, shuffle=True)
    val_dataloader = DataLoader(val_dataset, batch_size=eval_batch_size, shuffle=False)

    # Reset Model Info
    info = {
        "metric_train": [],
        "metric_val": [],
        "train_loss": [],
        "val_loss": [],
        "best_metric_val": -999,
    }
    
    for epoch in range(num_epochs):
        train_loss_epoch = []
        val_loss_epoch = []
    
        train_preds = []
        train_targets = []
    
        val_preds = []
        val_targets = []
    
        num_updates = epoch * len(train_dataloader)
    
        ### === Train Loop === ###

        model.train()
        for idx, batch in enumerate(tqdm(train_dataloader)):
            inputs, targets = batch
            outputs = model(inputs.to(device))
            loss = criterion(outputs, targets.to(device))
    
            loss.backward()
    
            # === Gradient Accumulation === #
            if ((idx + 1) % num_accumulate == 0) or (idx + 1 == len(train_dataloader)):
                optimizer.step()
                scheduler.step_update(num_updates=num_updates)
                optimizer.zero_grad()
            # ============================= #
    
            train_loss_epoch.append(loss.item())
            train_preds += outputs.argmax(-1).detach().cpu().tolist()
            train_targets += targets.tolist()
        ### ==================== ###
    
        optimizer.sync_lookahead()              # Sync slow weight and fast weight
        scheduler.step(epoch + 1)
    
        ### === Evaluation Loop === ###
        model.eval()
        with torch.no_grad():
            for batch in tqdm(val_dataloader):
                inputs, targets = batch
                outputs = model(inputs.to(device))
                loss = criterion(outputs, targets.to(device))
    
                # Log Values
                val_loss_epoch.append(loss.item())
                val_preds += outputs.argmax(-1).detach().cpu().tolist()
                val_targets += targets.tolist()
        ### ======================= ###
        
        # Log Data
        metric_train = metric.compute(predictions=train_preds, references=train_targets, average="macro")["f1"]
        metric_val = metric.compute(predictions=val_preds, references=val_targets, average="macro")["f1"]
    
        info["metric_train"].append(metric_train)
        info["metric_val"].append(metric_val)
    
        info["train_loss"].append(np.average(train_loss_epoch))
        info["val_loss"].append(np.average(val_loss_epoch))
    
        if metric_val > info["best_metric_val"]:
            print("New Best Score!")
            info["best_metric_val"] = metric_val
            torch.save(model, f"checkpoint_fold{fold}.pt")
        
#         print(info)
        print(f"Fold: {fold} | Epoch: {epoch} | Metric: {metric_val} | Training Loss: {np.average(train_loss_epoch)} | Validation Loss: {np.average(val_loss_epoch)}")
    
    # save all best metric val
    all_eval_scores.append(info["best_metric_val"])

In [None]:
all_eval_scores
print(model)

# หลังจากนี้จะเป็นโค้ดที่นำ ไปทำต่อใน Local

**Error Analysis**

In [None]:
for fold in range(k_splits):
    predictions = []
    references = []
    
    # load model
    loaded_model = torch.load(f"checkpoint_fold{fold}.pt")
    # Evaluation
    loaded_model.eval()
    with torch.no_grad():
        for batch in tqdm(val_dataloader):
            inputs, targets = batch
            outputs = loaded_model(inputs.to(device))
    
            # Log Values
            predictions += outputs.argmax(-1).detach().cpu().tolist()
            references += targets.tolist()
    
    print(f"Fold: {fold}")
    
    # Confusion matrix
    cm = confusion_matrix(references, predictions)
    disp = ConfusionMatrixDisplay(confusion_matrix=cm)
    disp.plot()
    plt.show()

Technique1: Voting Classifier

In [None]:
answers_final = dict()

# Loop for each fold
for fold in range(k_splits):
    # load model
    loaded_model = torch.load(f"/kaggle/working/checkpoint_fold{fold}.pt")

    # Evaluation
    model.eval() 
    with torch.no_grad():
        for f in tqdm(test_files):
            key = f.split("/")[-1].split(".")[0]
            
            img = Image.open(f).convert("RGB")
            transformed = transforms["test"](img).unsqueeze(0).to(device)
            
            # Collect all predicted value of each fold
            if fold == 0:
                answers_final[key] = [loaded_model(transformed).argmax(-1).item()]
            else:
                answers_final[key].append(loaded_model(transformed).argmax(-1).item())
print(answers_final)
for key in answers_final:
    # Take most occuring number to be answer
    answers_final[key] = stats.mode(answers_final[key])[0][0]

In [None]:
# save result of voting technique
with open(f"solution_maxvit_kfold{k_splits}_voting_tech1.csv", "w") as f:
    f.write("id,class\n")
    for name in answers_final:
        f.write(f"{name},{answers_final[name]}\n")

# Technique2: Weighted Ensemble

In [None]:
answers_final = dict()

# Loop for each fold
for fold in range(k_splits):
    # load model
    loaded_model = torch.load(f"checkpoint_fold{fold}.pt")

    # evaluation
    model.eval() 
    with torch.no_grad():
        for f in tqdm(test_files):
            key = f.split("/")[-1].split(".")[0]
   
            img = Image.open(f).convert("RGB")
            transformed = transforms["test"](img).unsqueeze(0).to(device)
            
            # Collect all predicted value of each fold "AND" multiply them with their evaluation scores
            if fold == 0:
                answers_final[key] = loaded_model(transformed).cpu().numpy() * all_eval_scores[fold]
            else:
                answers_final[key] = answers_final[key] + (loaded_model(transformed).cpu().numpy() * all_eval_scores[fold])

answers_raw = copy(answers_final)

for key in answers_final:
    # Take most occuring number 
    answers_final[key] = np.argmax(answers_final[key], -1).item()

In [None]:

# save result of voting technique
with open(f"solution_maxvit_kfold{k_splits}_weighted.csv", "w") as f:
    f.write("id,class\n")
    for name in answers_final:
        f.write(f"{name},{answers_final[name]}\n")

# Technique3: Pseudo Labeling

In [None]:
# Prepare softmax layer
softmax = nn.Softmax()

In [None]:
# create dict for collect confidence test data
to_move = {x: [] for x in range(7)}
threshold = 0.9   # How confidence of each answering

for key in answers_raw:
    # Get the predicted class idex by considering the most probability value
    predicted_class = np.argmax(answers_raw[key], -1).item()

    # If the model's confidence in the answer more than threshold, collect it
    if softmax(torch.tensor(answers_raw[key]))[predicted_class].item() > threshold:
        to_move[predicted_class].append(key)
     

In [None]:
# Copy confidence data to training set
for pred_class in to_move:
    for item in to_move[pred_class]:
        file_name = item.split("/")[-1]
        shutil.copyfile(item, f"signal/signal_train/{pred_class}/{file_name}")

# technique4: CSV Ensemble

In [None]:
all_answers = dict()

# Combine all predicted value from each csv
for idx, submission in enumerate(all_submissions):
    # read result csv
    df = pd.read_csv(submission)
    # convert csv to dictionary
    mapper = {}
    for i in range(len(df)):
        mapper[df.id[i]] = df['class'][i]
    # add each predicted value in to list
    for key in mapper:
        if idx == 0:
            all_answers[key] = [mapper[key]]
        else:
            all_answers[key].append(mapper[key])
     

In [None]:
real_answers = {}
# loop for all answer
for key in all_answers:
    # Take most occuring number 
    real_answers[key] = stats.mode(all_answers[key])[0]

In [None]:
# Save final result
with open(f"no1-3.0.csv", "w") as f:
    f.write("id,class\n")
    for name in real_answers:
        f.write(f"{name},{real_answers[name]}\n")


In [None]:
df = pd.read_csv("no1-3.0.csv")
df['class'][0] = 6
df['class'][0] = 4
df['class'][0] = 2
df.to_csv("no1-3.0.csv", index=False)