# Preprocessing Data

## Import Library

In [1]:
import pandas as pd
import numpy as np
from sktime.datasets import load_from_tsfile_to_dataframe
from sklearn.model_selection import StratifiedKFold
from sktime.transformations.panel.rocket import MiniRocket


## Path

In [2]:
TRAIN_DIR = r"D:\KULIAH\SEMESTER 5\Program Saint Data\Uranus\myfirstbook\Klasifikasi_Colposcopy\Dataset\Colposcopy_TRAIN.ts"
TEST_DIR = r"D:\KULIAH\SEMESTER 5\Program Saint Data\Uranus\myfirstbook\Klasifikasi_Colposcopy\Dataset\Colposcopy_TEST.ts"

X_train, y_train = load_from_tsfile_to_dataframe(TRAIN_DIR)
X_test,  y_test  = load_from_tsfile_to_dataframe(TEST_DIR)

print("Train path :", TRAIN_DIR)
print("Val path   :", TEST_DIR)

Train path : D:\KULIAH\SEMESTER 5\Program Saint Data\Uranus\myfirstbook\Klasifikasi_Colposcopy\Dataset\Colposcopy_TRAIN.ts
Val path   : D:\KULIAH\SEMESTER 5\Program Saint Data\Uranus\myfirstbook\Klasifikasi_Colposcopy\Dataset\Colposcopy_TEST.ts


## Gabungkan Data

In [3]:
X_all = np.vstack((X_train, X_test))   

y_all = np.concatenate((y_train, y_test))  

print("X_all shape:", X_all.shape)
print("y_all shape:", y_all.shape)
print("Distribusi kelas:", np.unique(y_all, return_counts=True))


X_all shape: (200, 1)
y_all shape: (200,)
Distribusi kelas: (array(['0', '1', '2', '3', '4', '5'], dtype='<U1'), array([15, 24, 20, 48, 37, 56], dtype=int64))


In [4]:
X_all_arr = np.vstack(X_all[:, 0])

print("X_all_arr shape:", X_all_arr.shape)
print("Contoh 1 baris:", X_all_arr[0][:10])

X_all_arr shape: (200, 180)
Contoh 1 baris: [4.49790795 4.49790795 7.79288703 2.14435146 0.73221757 5.91004184
 2.14435146 0.73221757 0.73221757 2.14435146]


## Normalisasi

In [5]:
def z_normalize_ts(X):
    """
    Z-normalization per time series
    X shape: (n_samples, n_timesteps)
    """
    X_norm = np.zeros_like(X, dtype=float)
    
    for i in range(X.shape[0]):
        mean = X[i].mean()
        std = X[i].std()
        if std == 0:
            X_norm[i] = X[i] 
        else:
            X_norm[i] = (X[i] - mean) / std
    
    return X_norm

X_norm = z_normalize_ts(X_all_arr)

print("X_norm shape:", X_norm.shape)
print("Mean sample 0:", X_norm[0].mean())
print("Std sample 0:", X_norm[0].std())

X_norm shape: (200, 180)
Mean sample 0: 4.194175870806147e-17
Std sample 0: 1.0


## Stratified K-Fold

In [6]:
skf = StratifiedKFold(
    n_splits=5,
    shuffle=True,
    random_state=42
)

for fold, (train_idx, val_idx) in enumerate(skf.split(X_norm, y_all), 1):
    print(f"\nFold {fold}")
    print("Train size:", len(train_idx))
    print("Val size:", len(val_idx))
    print("Distribusi kelas Train:", np.unique(y_all[train_idx], return_counts=True))
    print("Distribusi kelas Val:", np.unique(y_all[val_idx], return_counts=True))


Fold 1
Train size: 160
Val size: 40
Distribusi kelas Train: (array(['0', '1', '2', '3', '4', '5'], dtype='<U1'), array([12, 19, 16, 38, 30, 45], dtype=int64))
Distribusi kelas Val: (array(['0', '1', '2', '3', '4', '5'], dtype='<U1'), array([ 3,  5,  4, 10,  7, 11], dtype=int64))

Fold 2
Train size: 160
Val size: 40
Distribusi kelas Train: (array(['0', '1', '2', '3', '4', '5'], dtype='<U1'), array([12, 19, 16, 38, 30, 45], dtype=int64))
Distribusi kelas Val: (array(['0', '1', '2', '3', '4', '5'], dtype='<U1'), array([ 3,  5,  4, 10,  7, 11], dtype=int64))

Fold 3
Train size: 160
Val size: 40
Distribusi kelas Train: (array(['0', '1', '2', '3', '4', '5'], dtype='<U1'), array([12, 19, 16, 39, 29, 45], dtype=int64))
Distribusi kelas Val: (array(['0', '1', '2', '3', '4', '5'], dtype='<U1'), array([ 3,  5,  4,  9,  8, 11], dtype=int64))

Fold 4
Train size: 160
Val size: 40
Distribusi kelas Train: (array(['0', '1', '2', '3', '4', '5'], dtype='<U1'), array([12, 19, 16, 39, 29, 45], dtype=int64

## FIT MiniROCKET (FEATURE EXTRACTION)

In [7]:
train_idx, val_idx = next(skf.split(X_norm, y_all))

X_train = X_norm[train_idx]
X_val   = X_norm[val_idx]
y_train = y_all[train_idx]
y_val   = y_all[val_idx]

rocket = MiniRocket(
    num_kernels=1000,
    random_state=42
)

rocket.fit(X_train)

X_train_rocket = rocket.transform(X_train)
X_val_rocket   = rocket.transform(X_val)

print("X_train_rocket shape:", X_train_rocket.shape)
print("X_val_rocket shape:", X_val_rocket.shape)

SystemError: _PyEval_EvalFrameDefault returned a result with an exception set