# Visualization

This is a visualization of our project.

In [1]:
# import necessary libraries
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from collections import Counter
%matplotlib inline

sns.set_palette('pastel')

# import custom libaries
from parameters_settings import *
from my_lib import *

## Dataset Visualization

In [14]:
# Load dataset
train_set, class_names = load_dataset("./datasets/merged_train.pt")
test_set, class_names = load_dataset("./datasets/merged_test.pt")

# Display basic information
print("="*50)
print(f"- Length of Train Set: {len(train_set)}")
print(f"- Length of Test Set: {len(test_set)}")
print(f"- Number of Classes: {len(class_names)}")
print(f"- Shape of the Images: {train_set[0][0].shape}")

# Display a dataframe
train_counts = Counter()
for _, label in train_set:
    train_counts[label.item()] += 1

test_counts = Counter()
for _, label in train_set:
    test_counts[label.item()] += 1

dist_df = pd.DataFrame({
    'Classes': class_names,
    'Num of Train Set': [train_counts[i] for i in range(len(class_names))],
    'Num of Test Set': [test_counts[i] for i in range(len(class_names))]
})

dist_df['Percentage of Train Set'] = dist_df['Num of Train Set'] / len(train_set) * 100
dist_df['Percentage of Test Set'] = dist_df['Num of Test Set'] / len(train_set) * 100

display(dist_df.style.background_gradient(
    subset=['Num of Train Set', 'Num of Test Set'],
    cmap='Blues').format({
        'Percentage of Train Set': '{:.1f}%',
        'Percentage of Test Set': '{:.1f}%'
    }))

- Length of Train Set: 67557
- Length of Test Set: 11010
- Number of Classes: 16
- Shape of the Images: torch.Size([1, 28, 28])


Unnamed: 0,Classes,Num of Train Set,Num of Test Set,Percentage of Train Set,Percentage of Test Set
0,0,6349,6349,9.4%,9.4%
1,1,7174,7174,10.6%,10.6%
2,2,6388,6388,9.5%,9.5%
3,3,6560,6560,9.7%,9.7%
4,4,6273,6273,9.3%,9.3%
5,5,5852,5852,8.7%,8.7%
6,6,6347,6347,9.4%,9.4%
7,7,6695,6695,9.9%,9.9%
8,8,6280,6280,9.3%,9.3%
9,9,6379,6379,9.4%,9.4%


In [13]:
train_counts

Counter({1: 7174,
         7: 6695,
         3: 6560,
         2: 6388,
         9: 6379,
         0: 6349,
         6: 6347,
         8: 6280,
         4: 6273,
         5: 5852,
         12: 555,
         14: 554,
         11: 549,
         10: 545,
         13: 544,
         15: 513})