# Random Guess for Classification
In this part, we just take the most frequent class in train+valid set as our guess.

# Import library

In [1]:
import importlib
import pickle
import os
import torch
import sys
import matplotlib.pyplot as plt
import numpy as np
import random
import config
sys.path.append(os.path.join(os.getcwd(), 'Aging_Model'))

# Prepare data

## Dataset

In [2]:
if config.current_dataset is not None:
    ds = config.datasets[config.current_dataset]
else:
    ds = config.datasets[0]

ds

'VertebralColumn_c3'

## Load data

In [3]:
datapath = os.path.join(os.getcwd(), 'Datasets',
                        'dataset_processed', f'Dataset_{ds}.p')
with open(datapath, 'rb') as f:
    dataset = pickle.load(f)
X = dataset['X'].float()
y = dataset['y']

E, N_features, N_class = X.shape[0], X.shape[1], torch.max(
    torch.unique(y)).item()+1
X.shape, y.shape, E, N_features, N_class

(torch.Size([310, 6]), torch.Size([310]), 310, 6, 3)

## Pseudo-electrical Signal

In [4]:
X = X - torch.min(X, axis=0)[0]
X = X / (torch.max(X, axis=0)[0])
torch.min(X), torch.max(X)

(tensor(0.), tensor(1.))

In [5]:
X[:10, :9]

tensor([[0.3557, 0.5198, 0.2292, 0.2509, 0.3075, 0.0252],
        [0.1245, 0.2967, 0.0986, 0.1446, 0.4767, 0.0364],
        [0.4117, 0.5139, 0.3230, 0.3076, 0.3862, 0.0175],
        [0.4162, 0.5573, 0.2713, 0.2894, 0.3419, 0.0518],
        [0.2272, 0.2894, 0.1282, 0.2470, 0.4096, 0.0442],
        [0.1360, 0.3657, 0.0995, 0.1199, 0.6479, 0.0309],
        [0.2631, 0.4003, 0.2074, 0.2239, 0.5430, 0.0397],
        [0.1854, 0.3092, 0.1346, 0.1966, 0.5075, 0.0009],
        [0.1701, 0.3587, 0.2568, 0.1563, 0.5906, 0.0567],
        [0.1017, 0.2065, 0.2501, 0.1694, 0.1523, 0.0273]])

In [6]:
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader
from torch.utils.data import random_split

# generate tensordataset
dataset = TensorDataset(y)

# split
train_rate = 0.6
test_rate = 0.2
valid_rate = 0.2
E_train = min(8192, int(E*train_rate))
E_test = min(2048, int(E*test_rate))
E_valid = min(2048, int(E*valid_rate))

random.seed(config.data_split_seed);
np.random.seed(config.data_split_seed);
torch.manual_seed(config.data_split_seed);

train_data, rest_data = random_split(dataset, [E_train, E-E_train])
test_data, rest_data = random_split(rest_data, [E_test, E-E_train-E_test])
valid_data, rest_data = random_split(rest_data, [E_valid, E-E_train-E_test-E_valid])

len(train_data), len(test_data), len(valid_data)

(186, 62, 62)

In [7]:
train_data = train_data + test_data
train_data = np.array([data[0].item() for data in train_data])
train_data

array([2, 2, 2, 2, 1, 0, 2, 0, 1, 1, 2, 0, 1, 2, 2, 2, 2, 0, 2, 1, 2, 0,
       1, 1, 2, 1, 1, 0, 2, 0, 0, 1, 2, 2, 2, 2, 2, 0, 1, 2, 2, 2, 0, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 0, 1, 2, 0, 1, 2, 2, 0, 0, 0,
       2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 1, 1, 2, 1, 2, 1, 2, 2, 1, 2, 1, 2,
       2, 0, 0, 2, 1, 0, 2, 1, 2, 2, 0, 2, 0, 0, 1, 2, 1, 0, 0, 2, 2, 1,
       2, 1, 2, 2, 1, 2, 2, 0, 1, 1, 1, 2, 2, 1, 0, 2, 1, 1, 2, 1, 2, 1,
       2, 1, 2, 0, 1, 2, 0, 0, 2, 1, 1, 2, 1, 2, 2, 2, 2, 1, 2, 2, 1, 0,
       1, 2, 2, 2, 0, 0, 1, 1, 2, 2, 1, 2, 2, 1, 0, 0, 0, 1, 1, 1, 0, 1,
       0, 2, 1, 0, 2, 2, 2, 1, 1, 2, 0, 1, 2, 2, 2, 0, 2, 2, 2, 1, 2, 2,
       2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 1, 0, 1, 2, 2,
       2, 1, 2, 2, 2, 2, 1, 2, 2, 1, 1, 1, 1, 0, 0, 2, 2, 2, 0, 1, 0, 0,
       1, 1, 1, 1, 0, 2])

In [8]:
frequence = np.histogram(train_data, bins=N_class)[0]
frequence

array([ 46,  77, 125])

In [9]:
label = np.histogram(train_data, bins=N_class-1)[1]
label

array([0., 1., 2.])

In [10]:
guess = int(label[np.argmax(frequence)])
guess

2

In [11]:
Guess = np.ones(len(valid_data)) * guess
Guess

array([2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.,
       2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.,
       2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.,
       2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.])

In [12]:
Label = np.array([data[0].item() for data in valid_data])
Label

array([2, 0, 1, 2, 0, 0, 1, 2, 1, 2, 1, 2, 0, 1, 1, 0, 1, 1, 2, 0, 2, 0,
       1, 2, 2, 1, 2, 0, 0, 1, 2, 0, 2, 2, 1, 1, 2, 2, 2, 2, 2, 1, 1, 1,
       0, 0, 2, 2, 2, 0, 1, 1, 1, 1, 0, 1, 2, 2, 1, 2, 1, 2])

In [13]:
ACC = np.sum(Label==Guess) / Label.shape[0]
ACC

0.4032258064516129

In [14]:
path = os.path.join(os.getcwd(), 'result', 'figures', 'RandomGuess')

In [15]:
np.savetxt(f'{path}/{ds}_{int(ACC*1000)}.txt', [ACC])