In [2]:
# BUAT SCALING SELURUH DATA

import os
import pandas as pd

# Path to the directory containing EAR and MAR data files
data_dir = './MbaTia/Interpolasi/MarEar'
output_folder = './Dataset'

# Function to limit EAR and MAR values according to given constraints
def limit_ear_mar(data):
    # Limit EAR values
    data['ear_x'] = data['ear_x'].apply(lambda x: 0 if x == 0 else max(0.18, min(x, 0.38)))

    # Limit MAR values
    data['mar_x'] = data['mar_x'].apply(lambda x: 0 if x == 0 else max(0.15, min(x, 0.6)))


    return data

# Create output folder if it doesn't exist
os.makedirs(output_folder, exist_ok=True)

# Iterate over each file in the directory
for filename in os.listdir(data_dir):
    if filename.endswith('.csv'):
        filepath = os.path.join(data_dir, filename)

        # Read the CSV file
        try:
            data = pd.read_csv(filepath)
            # Check if required columns are present
            if 'ear_x' in data.columns and 'mar_x' in data.columns:
                # Process the data
                processed_data = limit_ear_mar(data)

                # Save the processed data to the output folder
                output_path = os.path.join(output_folder, filename)
                processed_data.to_csv(output_path, index=False)
            else:
                print(f"Warning: {filename} does not contain the required columns.")
        except Exception as e:
            print(f"Error processing {filename}: {e}")

print("Processing complete!")


Processing complete!


In [4]:
# BUAT NORMALISASI SELURUH DATA
import os
import pandas as pd

# Path to the directory containing EAR and MAR data files
data_dir = './Dataset'
output_folder = './Scaling'

# Define the maximum values for scaling
earMax = 0.38
marMax = 0.6

# Function to scale EAR and MAR values
def scaling_ear_mar(data):
    # Scale EAR values
    data['ear_x'] = data['ear_x'].apply(lambda x: x / earMax if x != 0 else 0)
    
    # Scale MAR values
    data['mar_x'] = data['mar_x'].apply(lambda x: x / marMax if x != 0 else 0)

    return data

# Iterate over each file in the directory
for filename in os.listdir(data_dir):
    if filename.endswith('.csv'):
        filepath = os.path.join(data_dir, filename)
        
        # Read the CSV file
        data = pd.read_csv(filepath)
        
        # Process data
        processed_data = scaling_ear_mar(data)
        
        # Save the processed data to the output folder
        output_path = os.path.join(output_folder, filename)
        processed_data.to_csv(output_path, index=False)


In [8]:
# overlapping 20%

import numpy as np
import os
import pandas as pd

def sliding_window(arr: np.ndarray, window_size: int, overlap: float = 0.2) -> np.ndarray:
    """
    Create a sliding window over a 2D NumPy array.

    Parameters:
    arr (np.ndarray): Input 2D array with shape (L, F)
    window_size (int): Size of the sliding window
    overlap (float): Overlap between windows as a percentage (0 to 1)

    Returns:
    np.ndarray: 3D array where each slice along the first dimension is a window
    """
    L, F = arr.shape
    step_size = int(window_size * (1 - overlap))
    num_windows = (L - window_size) // step_size + 1

    windows = np.array([arr[i:i + window_size] for i in range(0, num_windows * step_size, step_size)])
    return windows

def process_files_in_folder(folder_path: str, window_size: int, overlap: float = 0.2, output_folder: str = "output"):
    """
    Memproses setiap file dalam folder menggunakan sliding window dan menyimpan hasilnya.

    Parameters:
    folder_path (str): Path ke folder yang berisi file data
    window_size (int): Ukuran window
    overlap (float): Overlap antar window
    output_folder (str): Folder output untuk menyimpan hasil
    """
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    for file_name in sorted(os.listdir(folder_path)):
        file_path = os.path.join(folder_path, file_name)

        # Memproses file CSV
        if file_name.endswith('.csv'):
            try:
                # Membaca file CSV dan mengabaikan baris pertama (header)
                data = pd.read_csv(file_path)

                print(f"File CSV dibaca: {file_name}")
                print("Data pertama:", data.head())  # Menampilkan 5 baris pertama untuk debugging

                # Pastikan kolom-kolom yang diinginkan ada
                if 'ear_x' in data.columns and 'mar_x' in data.columns and 'label' in data.columns:
                    ear_x = data['ear_x'].values
                    mar_x = data['mar_x'].values
                    labels = data['label'].values

                    # Mengecek apakah ada baris kosong dalam data
                    if len(ear_x) == 0 or len(mar_x) == 0 or len(labels) == 0:
                        print(f"File {file_name} memiliki data kosong atau tidak lengkap. Dilewati.")
                        continue

                    # Gabungkan ear_x, mar_x, dan labels menjadi array 2D (3 kolom)
                    data_combined = np.column_stack((ear_x, mar_x, labels))

                    # Terapkan sliding window pada data gabungan
                    windows = sliding_window(data_combined, window_size, overlap)

                    # Mengecek kelas berdasarkan nama file
                    if file_name.endswith('-2.csv') or file_name.endswith('-3.csv'):
                        # Mengambil hanya setengah sampel untuk kelas 2 dan kelas 3
                        print(f"Jumlah sampel sebelum slicing: {windows.shape[0]}")
                        windows = windows[::2]  # Mengambil sampel dengan indeks genap (0, 2, 4, ...)
                        print(f"Jumlah sampel setelah slicing: {windows.shape[0]}") 

                    # Menyimpan hasil windows ke dalam satu file npy
                    output_file_path = os.path.join(output_folder, f"windows_{file_name.replace('.csv', '.npy')}")

                    # Menyimpan hasil sliding window dengan label
                    np.save(output_file_path, windows)
                    print(f"Hasil sliding window disimpan di: {output_file_path}")
                    print(f"File: {file_name} -> Shape data gabungan: {data_combined.shape}, Shape windows: {windows.shape}")
                else:
                    print(f"File {file_name} tidak memiliki kolom yang dibutuhkan ('ear_x', 'mar_x', 'label').")
            except pd.errors.EmptyDataError:
                print(f"File {file_name} kosong.")
            except Exception as e:
                print(f"Error membaca file {file_name}: {e}")
        else:
            print(f"File {file_name} bukan file CSV, dilewati.")

# Parameter
folder_path = 'F:/Tugas Akhir/Scaling'  # Ganti dengan folder yang sesuai
window_size = 5400  # Sesuaikan dengan ukuran window
overlap = 0.2
output_folder = 'F:/Tugas Akhir/Output_2_kelas'

# Memproses semua file dalam folder dan menyimpan hasil sebagai file npy
process_files_in_folder(folder_path, window_size, overlap, output_folder)


File CSV dibaca: 1-1.csv
Data pertama:       ear_x  mar_x  label
0  0.987948   0.25      1
1  1.000000   0.25      1
2  0.996988   0.25      1
3  0.887383   0.25      1
4  0.905816   0.25      1
Hasil sliding window disimpan di: F:/Tugas Akhir/Output_2_kelas\windows_1-1.npy
File: 1-1.csv -> Shape data gabungan: (16200, 3), Shape windows: (3, 5400, 3)
File CSV dibaca: 1-2.csv
Data pertama:       ear_x  mar_x  label
0  0.918184   0.25      2
1  0.857007   0.25      2
2  0.840064   0.25      2
3  0.836288   0.25      2
4  0.745355   0.25      2
Jumlah sampel sebelum slicing: 3
Jumlah sampel setelah slicing: 2
Hasil sliding window disimpan di: F:/Tugas Akhir/Output_2_kelas\windows_1-2.npy
File: 1-2.csv -> Shape data gabungan: (16200, 3), Shape windows: (2, 5400, 3)
File CSV dibaca: 1-3.csv
Data pertama:       ear_x  mar_x  label
0  0.911955   0.25      3
1  0.812083   0.25      3
2  0.839784   0.25      3
3  0.797411   0.25      3
4  0.820700   0.25      3
Jumlah sampel sebelum slicing: 3


In [12]:
# Menggabungkan dan memperkecil sampel kelas 2 dan 3

import numpy as np
import os
from collections import defaultdict

def load_data(file_path):
    """
    Membaca file CSV atau NPY dan mengembalikan array NumPy.
    """
    if file_path.endswith('.csv'):
        return np.loadtxt(file_path, delimiter=',', skiprows=1)
    elif file_path.endswith('.npy'):
        return np.load(file_path)
    else:
        return None

def save_combined_data(data, output_file_path):
    """
    Menyimpan data yang sudah digabungkan ke file NPY.
    """
    np.save(output_file_path, data)
    print(f"Data gabungan disimpan di: {output_file_path}")

def process_files_by_suffix(folder_path, output_folder):
    """
    Menggabungkan file berdasarkan akhiran yang sama, mengurangi jumlah sampel menjadi 63 untuk kelas 2 dan 3,
    dan menyimpan hasilnya.
    """
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # Mengelompokkan file berdasarkan akhiran
    grouped_files = defaultdict(list)

    for file_name in sorted(os.listdir(folder_path)):
        file_path = os.path.join(folder_path, file_name)
        if not (file_name.endswith('.csv') or file_name.endswith('.npy')):
            continue

        # Mendapatkan akhiran file (misalnya "14-1" dari "14-1.csv")
        suffix = file_name.split('-')[-1].split('.')[0]
        grouped_files[suffix].append(file_path)

    # Menggabungkan file yang memiliki akhiran yang sama
    for suffix, files in grouped_files.items():
        combined_data = []

        for file_path in files:
            data = load_data(file_path)
            if data is not None:
                combined_data.append(data)

        # Menggabungkan data dari semua file dengan akhiran yang sama
        if combined_data:
            combined_data = np.vstack(combined_data)
            
            # Untuk kelas 2 dan 3, kurangi jumlah sampel menjadi 63
            if suffix == '2' or suffix == '3':
                if len(combined_data) > 63:
                    # Lakukan random sampling jika jumlah sampel lebih dari 63
                    indices = np.random.choice(len(combined_data), size=63, replace=False)
                    combined_data = combined_data[indices]

            output_file_path = os.path.join(output_folder, f"combined_{suffix}.npy")
            save_combined_data(combined_data, output_file_path)

def check_combined_data_shapes(output_folder):
    """
    Memeriksa shape dari setiap file NPY yang sudah digabungkan.
    """
    for file_name in sorted(os.listdir(output_folder)):
        if file_name.endswith('.npy'):
            file_path = os.path.join(output_folder, file_name)
            try:
                data = np.load(file_path)
                print(f"File: {file_name} -> Shape: {data.shape}")
            except Exception as e:
                print(f"Error memuat file {file_name}: {e}")

# Parameter
folder_path = 'F:/Tugas Akhir/Output_2_kelas'  # Ganti dengan folder yang sesuai
output_folder = 'F:/Tugas Akhir/2_ClassCombined'  # Folder output yang sesuai

# Memeriksa shape dari file yang sudah digabungkan sebelum proses penggabungan
check_combined_data_shapes(output_folder)

# Memproses file dan menggabungkannya
process_files_by_suffix(folder_path, output_folder)

# Memeriksa shape dari file yang sudah digabungkan setelah proses penggabungan
check_combined_data_shapes(output_folder)


File: combined_1.npy -> Shape: (126, 5400, 3)
File: combined_2.npy -> Shape: (63, 5400, 3)
File: combined_3.npy -> Shape: (63, 5400, 3)
Data gabungan disimpan di: F:/Tugas Akhir/2_ClassCombined\combined_1.npy
Data gabungan disimpan di: F:/Tugas Akhir/2_ClassCombined\combined_2.npy
Data gabungan disimpan di: F:/Tugas Akhir/2_ClassCombined\combined_3.npy
File: combined_1.npy -> Shape: (126, 5400, 3)
File: combined_2.npy -> Shape: (63, 5400, 3)
File: combined_3.npy -> Shape: (63, 5400, 3)


In [15]:
# mengacak

import numpy as np
import os

def shuffle_first_dimension(file_path, output_file_path):
    """
    Mengacak dimensi pertama dari data NumPy dalam file dan mencetak bentuk data sebelum dan setelah pengacakan.
    
    Parameters:
    file_path (str): Path file input yang berisi data NumPy.
    output_file_path (str): Path untuk menyimpan data yang sudah diacak.
    """
    # Memuat data dari file
    data = np.load(file_path)
    
    # Mencetak shape sebelum pengacakan
    print(f"Shape sebelum pengacakan: {data.shape}")
    
    # Mengacak dimensi pertama
    np.random.shuffle(data)
    
    # Mencetak shape setelah pengacakan
    print(f"Shape setelah pengacakan: {data.shape}")
    
    # Membuat folder jika belum ada
    output_folder = os.path.dirname(output_file_path)
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    # Menyimpan data yang sudah diacak
    np.save(output_file_path, data)
    print(f"Data yang sudah diacak disimpan di: {output_file_path}")
    
# Parameter
file_path = 'F:/Tugas Akhir/2_ClassCombined/combined_1.npy'  # Ganti dengan path file Anda
output_file_path = 'F:/Tugas Akhir/2_kelas_Shuffled/shuffled_1.npy'  # Path untuk menyimpan hasil

# Mengacak dimensi pertama dan menyimpan hasilnya
shuffle_first_dimension(file_path, output_file_path)


Shape sebelum pengacakan: (126, 5400, 3)
Shape setelah pengacakan: (126, 5400, 3)
Data yang sudah diacak disimpan di: F:/Tugas Akhir/2_kelas_Shuffled/shuffled_1.npy


In [16]:
import numpy as np
import os

def split_data(data, train_ratio=0.6, val_ratio=0.1):
    """
    Membagi data menjadi train, validation, dan test.

    Parameters:
    data (np.ndarray): Data yang akan dibagi.
    train_ratio (float): Proporsi data untuk training.
    val_ratio (float): Proporsi data untuk validation.

    Returns:
    tuple: (train_data, val_data, test_data)
    """
    # Mengacak data
    np.random.shuffle(data)

    # Menghitung jumlah sampel
    total_samples = data.shape[0]
    train_size = int(total_samples * train_ratio)
    val_size = int(total_samples * val_ratio)

    # Membagi data
    train_data = data[:train_size]
    val_data = data[train_size:train_size + val_size]
    test_data = data[train_size + val_size:]

    return train_data, val_data, test_data

def process_and_split_files(folder_path, output_folder):
    """
    Memproses dan membagi setiap file di folder berdasarkan proporsi.

    Parameters:
    folder_path (str): Path folder input yang berisi file data.
    output_folder (str): Path folder output untuk menyimpan hasil split.
    """
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # Membuat folder output untuk train, val, dan test
    train_folder = os.path.join(output_folder, 'train')
    val_folder = os.path.join(output_folder, 'val')
    test_folder = os.path.join(output_folder, 'test')
    os.makedirs(train_folder, exist_ok=True)
    os.makedirs(val_folder, exist_ok=True)
    os.makedirs(test_folder, exist_ok=True)

    # Memproses setiap file di folder input
    for file_name in sorted(os.listdir(folder_path)):
        if not file_name.endswith('.npy'):
            continue

        file_path = os.path.join(folder_path, file_name)
        data = np.load(file_path)

        # Membagi data menjadi train, val, dan test
        train_data, val_data, test_data = split_data(data)

        # Menyimpan hasil split
        base_name = os.path.splitext(file_name)[0]
        np.save(os.path.join(train_folder, f"{base_name}_train.npy"), train_data)
        np.save(os.path.join(val_folder, f"{base_name}_val.npy"), val_data)
        np.save(os.path.join(test_folder, f"{base_name}_test.npy"), test_data)

        print(f"Data {file_name} -> Train: {train_data.shape}, Val: {val_data.shape}, Test: {test_data.shape}")

# Parameter
folder_path = 'F:/Tugas Akhir/2_kelas_Shuffled'  # Folder input dengan file combined_X.npy
output_folder = 'F:/Tugas Akhir/2_kelas_Split'  # Folder output

# Memproses dan membagi data
process_and_split_files(folder_path, output_folder)


Data shuffled_1.npy -> Train: (75, 5400, 3), Val: (12, 5400, 3), Test: (39, 5400, 3)
Data shuffled_2.npy -> Train: (37, 5400, 3), Val: (6, 5400, 3), Test: (20, 5400, 3)
Data shuffled_3.npy -> Train: (37, 5400, 3), Val: (6, 5400, 3), Test: (20, 5400, 3)


In [1]:
# DATA DIBAGI MENJADI 5400 FRAME DENGAN CARA DIAMBIL TIAP 3 FRAME
import numpy as np
import pandas as pd
import os
import glob

# Parameter setup
WINDOW_LEN = 5400
BATCH_SIZE = 540
SAVE_PATH = 'F:/Tugas Akhir/Itqan/dataset/preprocess/'

# Ensure save path exists
os.makedirs(SAVE_PATH, exist_ok=True)

# Get all CSV files in the specified directory
data_files = glob.glob('./Scaling/*.csv')

# Function to process and save batches for a single CSV file
def process_and_save_batches(data, label, mode):
    batch_num = 0
    print(f"Processing {mode} data with {len(data)} rows...")

    # Use the actual label value
    label_value = label

    # Iterate over three different patterns: start from 0, 1, and 2
    for offset in range(3):
        segments = []
        indices = range(offset, len(data), 3)

        print(f"Offset {offset}, total indices: {len(indices)}")

        for start in range(0, len(indices) - WINDOW_LEN + 1, WINDOW_LEN):
            window_indices = indices[start:start + WINDOW_LEN]

            # Ensure we have enough indices for a complete window
            if len(window_indices) < WINDOW_LEN:
                print(f"Skipping window at start {start} due to insufficient length.")
                continue

            # Extract EAR and MAR data
            ear = data['ear_x'].values[window_indices]
            mar = data['mar_x'].values[window_indices]
            
            segments.append(np.array([ear, mar]).T)

            # Save batch if it reaches batch size
            if len(segments) == BATCH_SIZE:
                x_batch = np.array(segments)
                y_batch = np.full((x_batch.shape[0], 1), label_value)  # use the actual label value

                # Save batch to file
                file_path = f"{SAVE_PATH}{mode}_batch_{batch_num}_offset_{offset}.npz"
                np.savez_compressed(file_path, x=x_batch, y=y_batch)
                print(f"Saved batch {batch_num} with offset {offset} for {mode} data to {file_path}")
                
                # Reset for next batch
                segments = []
                batch_num += 1

        # Save remaining segments as a final batch
        if segments:
            x_batch = np.array(segments)
            y_batch = np.full((len(segments), 1), label_value)  # same label for remaining segments
            file_path = f"{SAVE_PATH}{mode}_batch_{batch_num}_offset_{offset}_last.npz"
            np.savez_compressed(file_path, x=x_batch, y=y_batch)
            print(f"Saved final partial batch {batch_num} with offset {offset} for {mode} data to {file_path}")

# Process each CSV file separately
for file in data_files:
    data = pd.read_csv(file)
    mode = os.path.splitext(os.path.basename(file))[0]  # Get the base filename without extension
    label = data['label'].iloc[0]  # Get the single label for the file
    process_and_save_batches(data, label, mode)


Processing 1-1 data with 16200 rows...
Offset 0, total indices: 5400
Saved final partial batch 0 with offset 0 for 1-1 data to F:/Tugas Akhir/Itqan/dataset/preprocess/1-1_batch_0_offset_0_last.npz
Offset 1, total indices: 5400
Saved final partial batch 0 with offset 1 for 1-1 data to F:/Tugas Akhir/Itqan/dataset/preprocess/1-1_batch_0_offset_1_last.npz
Offset 2, total indices: 5400
Saved final partial batch 0 with offset 2 for 1-1 data to F:/Tugas Akhir/Itqan/dataset/preprocess/1-1_batch_0_offset_2_last.npz
Processing 1-2 data with 16200 rows...
Offset 0, total indices: 5400
Saved final partial batch 0 with offset 0 for 1-2 data to F:/Tugas Akhir/Itqan/dataset/preprocess/1-2_batch_0_offset_0_last.npz
Offset 1, total indices: 5400
Saved final partial batch 0 with offset 1 for 1-2 data to F:/Tugas Akhir/Itqan/dataset/preprocess/1-2_batch_0_offset_1_last.npz
Offset 2, total indices: 5400
Saved final partial batch 0 with offset 2 for 1-2 data to F:/Tugas Akhir/Itqan/dataset/preprocess/1-2_

In [2]:
# GABUNGKAN FILE DIATAS
import numpy as np
import os
import glob

# Directory containing the .npz files
DATASET_DIR = 'F:/Tugas Akhir/Itqan/dataset/preprocess/'
COMBINED_FILE_PATH = 'F:/Tugas Akhir/Itqan/combined_data.npz'

# Get all .npz files in the dataset directory and sort them
npz_files = sorted(glob.glob(os.path.join(DATASET_DIR, '*.npz')))

# Initialize lists to hold combined data
all_x = []
all_y = []

# Iterate over the sorted .npz files
for file in npz_files:
    print(f"Loading {file}...")
    data = np.load(file)
    
    # Append the data from each file to the lists
    all_x.append(data['x'])
    all_y.append(data['y'])

# Concatenate all loaded arrays along the first axis (0-axis)
combined_x = np.concatenate(all_x, axis=0)
combined_y = np.concatenate(all_y, axis=0)

# Save the combined arrays to a new .npz file
np.savez_compressed(COMBINED_FILE_PATH, x=combined_x, y=combined_y)
print(f"Combined data saved to {COMBINED_FILE_PATH}")

# Optional: Check the shape of the combined arrays
print("Shape of combined x:", combined_x.shape)
print("Shape of combined y:", combined_y.shape)


Loading F:/Tugas Akhir/Itqan/dataset/preprocess\1-1_batch_0_offset_0_last.npz...
Loading F:/Tugas Akhir/Itqan/dataset/preprocess\1-1_batch_0_offset_1_last.npz...
Loading F:/Tugas Akhir/Itqan/dataset/preprocess\1-1_batch_0_offset_2_last.npz...
Loading F:/Tugas Akhir/Itqan/dataset/preprocess\1-2_batch_0_offset_0_last.npz...
Loading F:/Tugas Akhir/Itqan/dataset/preprocess\1-2_batch_0_offset_1_last.npz...
Loading F:/Tugas Akhir/Itqan/dataset/preprocess\1-2_batch_0_offset_2_last.npz...
Loading F:/Tugas Akhir/Itqan/dataset/preprocess\1-3_batch_0_offset_0_last.npz...
Loading F:/Tugas Akhir/Itqan/dataset/preprocess\1-3_batch_0_offset_1_last.npz...
Loading F:/Tugas Akhir/Itqan/dataset/preprocess\1-3_batch_0_offset_2_last.npz...
Loading F:/Tugas Akhir/Itqan/dataset/preprocess\10-1_batch_0_offset_0_last.npz...
Loading F:/Tugas Akhir/Itqan/dataset/preprocess\10-1_batch_0_offset_1_last.npz...
Loading F:/Tugas Akhir/Itqan/dataset/preprocess\10-1_batch_0_offset_2_last.npz...
Loading F:/Tugas Akhir/It

In [47]:
# MIRIP KYK DIATAS
import numpy as np
import os
import glob

# Directory containing the .npz files
DATASET_DIR = 'F:/Tugas Akhir/Itqan/dataset/preprocess/'
COMBINED_FILE_PATH = 'F:/Tugas Akhir/Itqan/dataset/combined_data.npz'

# Get all .npz files in the dataset directory and sort them
npz_files = sorted(glob.glob(os.path.join(DATASET_DIR, '*.npz')))

# Initialize lists to hold combined data and labels
all_x = []
all_y = []

# Iterate over the sorted .npz files
for file in npz_files:
    print(f"Loading {file}...")
    data = np.load(file)
    
    # Extract x and y
    x_data = data['x']
    y_data = data['y'][0, 0]  # Assuming y has shape (1, 1)

    # Append x data
    all_x.append(x_data)

    # Create a label array for the current file, with the same number of rows as x_data
    y_labels = np.full((x_data.shape[0], 1), y_data)  # Repeat the label for each segment
    all_y.append(y_labels)

# Concatenate all loaded arrays along the first axis (0-axis)
combined_x = np.concatenate(all_x, axis=0)
combined_y = np.concatenate(all_y, axis=0)

# Save the combined arrays to a new .npz file
np.savez_compressed(COMBINED_FILE_PATH, x=combined_x, y=combined_y)
print(f"Combined data saved to {COMBINED_FILE_PATH}")

# Optional: Check the shape of the combined arrays
print("Shape of combined x:", combined_x.shape)
print("Shape of combined y:", combined_y.shape)

# Optionally, you can print the first few labels to verify
print("First few samples of y:", combined_y[:5])


Loading F:/Tugas Akhir/Itqan/dataset/preprocess\1-1_batch_0_offset_0_last.npz...
Loading F:/Tugas Akhir/Itqan/dataset/preprocess\1-1_batch_0_offset_1_last.npz...
Loading F:/Tugas Akhir/Itqan/dataset/preprocess\1-1_batch_0_offset_2_last.npz...
Loading F:/Tugas Akhir/Itqan/dataset/preprocess\1-2_batch_0_offset_0_last.npz...
Loading F:/Tugas Akhir/Itqan/dataset/preprocess\1-2_batch_0_offset_1_last.npz...
Loading F:/Tugas Akhir/Itqan/dataset/preprocess\1-2_batch_0_offset_2_last.npz...
Loading F:/Tugas Akhir/Itqan/dataset/preprocess\1-3_batch_0_offset_0_last.npz...
Loading F:/Tugas Akhir/Itqan/dataset/preprocess\1-3_batch_0_offset_1_last.npz...
Loading F:/Tugas Akhir/Itqan/dataset/preprocess\1-3_batch_0_offset_2_last.npz...
Loading F:/Tugas Akhir/Itqan/dataset/preprocess\10-1_batch_0_offset_0_last.npz...
Loading F:/Tugas Akhir/Itqan/dataset/preprocess\10-1_batch_0_offset_1_last.npz...
Loading F:/Tugas Akhir/Itqan/dataset/preprocess\10-1_batch_0_offset_2_last.npz...
Loading F:/Tugas Akhir/It

In [5]:
# DATA DIBAGI MENJADI 5400 FRAME DENGAN CARA TIAP SEPANJANG 3 FRAME DI RATA - RATAKAN
import numpy as np
import pandas as pd
import os
import glob

# Parameter setup
WINDOW_LEN = 5400  # Total number of frames to consider for each file
SAVE_PATH = 'F:/Tugas Akhir/Itqan/dataset/preprocessedAvg/'  # Path to save the processed files

# Ensure save path exists
os.makedirs(SAVE_PATH, exist_ok=True)

# Get all CSV files in the specified directory
data_files = glob.glob('./Scaling/*.csv')

# Function to process and save averaged data for a single CSV file
def process_and_save_data(data, label, mode):
    print(f"Processing {mode} data with {len(data)} rows...")

    segments = []

    # Average every three frames
    for start in range(0, len(data), 3):
        if start + 2 < len(data):  # Check for complete group of 3
            # Calculate the averages for EAR and MAR
            ear_avg = np.mean(data['ear_x'].values[start:start + 3])
            mar_avg = np.mean(data['mar_x'].values[start:start + 3])
            segments.append(np.array([ear_avg, mar_avg]))

    # Convert segments to a NumPy array
    segments = np.array(segments)

    # Ensure the total number of frames does not exceed WINDOW_LEN
    if len(segments) > WINDOW_LEN:
        segments = segments[:WINDOW_LEN]
        
    # Print the shape of the segments before saving
    print(f"Shape of segments for {mode}: {segments.shape}")
    print(f"Shape of labels for {mode}: {(segments.shape[0], 1)}")

    # Save the averaged data directly to a file
    file_path = f"{SAVE_PATH}{mode}_averaged.npz"
    np.savez_compressed(file_path, x=segments, y=np.full((segments.shape[0], 1), label))  # Save all data at once
    print(f"Saved averaged data for {mode} to {file_path}")

# Process each CSV file separately
for file in data_files:
    data = pd.read_csv(file)
    mode = os.path.splitext(os.path.basename(file))[0]  # Get the base filename without extension
    label = data['label'].iloc[0]  # Get the single label for the file
    process_and_save_data(data, label, mode)


Processing 1-1 data with 16200 rows...
Shape of segments for 1-1: (5400, 2)
Shape of labels for 1-1: (5400, 1)
Saved averaged data for 1-1 to F:/Tugas Akhir/Itqan/dataset/preprocessedAvg/1-1_averaged.npz
Processing 1-2 data with 16200 rows...
Shape of segments for 1-2: (5400, 2)
Shape of labels for 1-2: (5400, 1)
Saved averaged data for 1-2 to F:/Tugas Akhir/Itqan/dataset/preprocessedAvg/1-2_averaged.npz
Processing 1-3 data with 16200 rows...
Shape of segments for 1-3: (5400, 2)
Shape of labels for 1-3: (5400, 1)
Saved averaged data for 1-3 to F:/Tugas Akhir/Itqan/dataset/preprocessedAvg/1-3_averaged.npz
Processing 10-1 data with 16200 rows...
Shape of segments for 10-1: (5400, 2)
Shape of labels for 10-1: (5400, 1)
Saved averaged data for 10-1 to F:/Tugas Akhir/Itqan/dataset/preprocessedAvg/10-1_averaged.npz
Processing 10-2 data with 16200 rows...
Shape of segments for 10-2: (5400, 2)
Shape of labels for 10-2: (5400, 1)
Saved averaged data for 10-2 to F:/Tugas Akhir/Itqan/dataset/prep

In [6]:
# GABUNGKAN FILE DIATAS
import numpy as np
import os
import glob

# Directory containing the .npz files
DATASET_DIR = 'F:/Tugas Akhir/Itqan/dataset/preprocessedAvg/'
COMBINED_FILE_PATH = 'F:/Tugas Akhir/Itqan/combined_data_avg.npz'

# Get all .npz files in the dataset directory and sort them
npz_files = sorted(glob.glob(os.path.join(DATASET_DIR, '*.npz')))

# Initialize lists to hold combined data
all_x = []
all_y = []

# Iterate over the sorted .npz files
for file in npz_files:
    print(f"Loading {file}...")
    data = np.load(file)
    
    # Append the data from each file to the lists
    all_x.append(data['x'][np.newaxis, :, :])  # Add a new axis to each x
    all_y.append(data['y'][0, 0])  # Take only the first label for each file

    # Check shape of each file to ensure consistency
    print(f"Shape of x in {file}: {data['x'].shape}")
    print(f"Shape of y in {file}: {data['y'].shape}")

# Concatenate along the first axis (number of samples)
combined_x = np.concatenate(all_x, axis=0)
combined_y = np.array(all_y).reshape(-1, 1)  # Reshape to (126, 1) if you have 126 samples

# Save the combined arrays to a new .npz file
np.savez_compressed(COMBINED_FILE_PATH, x=combined_x, y=combined_y)
print(f"Combined data saved to {COMBINED_FILE_PATH}")

# Optional: Check the shape of the combined arrays
print("Shape of combined x:", combined_x.shape)
print("Shape of combined y:", combined_y.shape)


Loading F:/Tugas Akhir/Itqan/dataset/preprocessedAvg\1-1_averaged.npz...
Shape of x in F:/Tugas Akhir/Itqan/dataset/preprocessedAvg\1-1_averaged.npz: (5400, 2)
Shape of y in F:/Tugas Akhir/Itqan/dataset/preprocessedAvg\1-1_averaged.npz: (5400, 1)
Loading F:/Tugas Akhir/Itqan/dataset/preprocessedAvg\1-2_averaged.npz...
Shape of x in F:/Tugas Akhir/Itqan/dataset/preprocessedAvg\1-2_averaged.npz: (5400, 2)
Shape of y in F:/Tugas Akhir/Itqan/dataset/preprocessedAvg\1-2_averaged.npz: (5400, 1)
Loading F:/Tugas Akhir/Itqan/dataset/preprocessedAvg\1-3_averaged.npz...
Shape of x in F:/Tugas Akhir/Itqan/dataset/preprocessedAvg\1-3_averaged.npz: (5400, 2)
Shape of y in F:/Tugas Akhir/Itqan/dataset/preprocessedAvg\1-3_averaged.npz: (5400, 1)
Loading F:/Tugas Akhir/Itqan/dataset/preprocessedAvg\10-1_averaged.npz...
Shape of x in F:/Tugas Akhir/Itqan/dataset/preprocessedAvg\10-1_averaged.npz: (5400, 2)
Shape of y in F:/Tugas Akhir/Itqan/dataset/preprocessedAvg\10-1_averaged.npz: (5400, 1)
Loading F

In [7]:
# PEMBAGIAN DATA TRAINING, VALIDASI DAN TEST
import numpy as np
import pandas as pd

# Constants
WINDOW_LEN = 5400
N_FEATURE = 2
DATA_PATH = './Itqan/combined_data_avg.npz'
AUGMENTED_DATA_PATH = 'F:/Tugas Akhir/Itqan/augmented_data_avg.npz'

# Load the combined data
data = np.load(DATA_PATH)
x_data = data['x']
y_data = data['y'].flatten()

# Count the number of samples per class
class_counts = np.unique(y_data, return_counts=True)

# Prepare the datasets
x_train, y_train = [], []
x_val, y_val = [], []
x_test, y_test = [], []

# Split the data
for class_label in class_counts[0]:
    class_indices = np.where(y_data == class_label)[0]
    class_x = x_data[class_indices]
    class_y = y_data[class_indices]
    
    # Shuffle the data indices
    indices = np.random.permutation(len(class_x))

    # Select samples for each set
    x_train.append(class_x[indices[:30]])  # First 30 for train
    y_train.append(class_y[indices[:30]])  # Corresponding labels for train
    
    x_val.append(class_x[indices[30:36]])  # Next 6 for validation
    y_val.append(class_y[indices[30:36]])  # Corresponding labels for validation
    
    x_test.append(class_x[indices[36:42]]) # Last 6 for test
    y_test.append(class_y[indices[36:42]]) # Corresponding labels for test

# Convert lists to arrays
x_train = np.vstack(x_train)  # Shape will be (90, 5400, 2)
y_train = np.concatenate(y_train)  # Shape will be (90,)
x_val = np.vstack(x_val)
y_val = np.concatenate(y_val)
x_test = np.vstack(x_test)
y_test = np.concatenate(y_test)

# One-hot encode the labels
y_train = pd.get_dummies(y_train).values  # Shape will be (90, 3)
y_val = pd.get_dummies(y_val).values
y_test = pd.get_dummies(y_test).values

# Reshape the data for model input
x_train = x_train.reshape(-1, WINDOW_LEN, N_FEATURE)  # Shape will be (90, 5400, 2)
x_val = x_val.reshape(-1, WINDOW_LEN, N_FEATURE)
x_test = x_test.reshape(-1, WINDOW_LEN, N_FEATURE)

# Save the combined arrays to a new .npz file
np.savez_compressed(AUGMENTED_DATA_PATH, x_train=x_train, y_train=y_train, x_val=x_val, y_val=y_val, x_test=x_test, y_test=y_test)
print(f"Combined data saved to {AUGMENTED_DATA_PATH}")

# Optional: Check the shape of the combined arrays
print("Shape of x train:", x_train.shape)
print("Shape of y train:", y_train.shape)
print("Shape of x val:", x_val.shape)
print("Shape of y val:", y_val.shape)
print("Shape of x test:", x_test.shape)
print("Shape of y test:", y_test.shape)

Combined data saved to F:/Tugas Akhir/Itqan/augmented_data_avg.npz
Shape of x train: (90, 5400, 2)
Shape of y train: (90, 3)
Shape of x val: (18, 5400, 2)
Shape of y val: (18, 3)
Shape of x test: (18, 5400, 2)
Shape of y test: (18, 3)


In [6]:
import numpy as np
import pandas as pd
import h5py

# Constants
WINDOW_LEN = 5400
N_FEATURE = 2
DATA_PATH = './Itqan/combined_data_avg.npz'
AUGMENTED_DATA_PATH = 'F:/Tugas Akhir/Itqan/augmentasi/augmented_data.h5'

# Load the combined data
data = np.load(DATA_PATH)
x_data = data['x']
y_data = data['y'].flatten()

# Count the number of samples per class
class_counts = np.unique(y_data, return_counts=True)

# Prepare the datasets
x_train, y_train = [], []
x_val, y_val = [], []
x_test, y_test = [], []

# Split the data
for class_label in class_counts[0]:
    class_indices = np.where(y_data == class_label)[0]
    class_x = x_data[class_indices]
    class_y = y_data[class_indices]
    
    # Shuffle the data indices
    indices = np.random.permutation(len(class_x))

    # Select samples for each set
    x_train.append(class_x[indices[:30]])  # First 30 for train
    y_train.append(class_y[indices[:30]])  # Corresponding labels for train
    
    x_val.append(class_x[indices[30:36]])  # Next 6 for validation
    y_val.append(class_y[indices[30:36]])  # Corresponding labels for validation
    
    x_test.append(class_x[indices[36:42]]) # Last 6 for test
    y_test.append(class_y[indices[36:42]]) # Corresponding labels for test

# Convert lists to arrays
x_train = np.vstack(x_train)  # Shape will be (90, 5400, 2)
y_train = np.concatenate(y_train)  # Shape will be (90,)
x_val = np.vstack(x_val)
y_val = np.concatenate(y_val)
x_test = np.vstack(x_test)
y_test = np.concatenate(y_test)

# One-hot encode the labels
y_train = pd.get_dummies(y_train).values  # Shape will be (90, 3)
y_val = pd.get_dummies(y_val).values
y_test = pd.get_dummies(y_test).values

# Reshape the data for model input
x_train = x_train.reshape(-1, WINDOW_LEN, N_FEATURE)  # Shape will be (90, 5400, 2)
x_val = x_val.reshape(-1, WINDOW_LEN, N_FEATURE)
x_test = x_test.reshape(-1, WINDOW_LEN, N_FEATURE)

# Augmentation parameters
noise_intensity = 0.01

# Create HDF5 file
with h5py.File(AUGMENTED_DATA_PATH, 'w') as h5file:
    # Create datasets for training, validation, and test data
    h5file.create_dataset('x_train', data=x_train, compression='gzip')
    h5file.create_dataset('y_train', data=y_train, compression='gzip')
    h5file.create_dataset('x_val', data=x_val, compression='gzip')
    h5file.create_dataset('y_val', data=y_val, compression='gzip')
    h5file.create_dataset('x_test', data=x_test, compression='gzip')
    h5file.create_dataset('y_test', data=y_test, compression='gzip')

    # List to hold augmented data
    augmented_x_train = []
    augmented_y_train = []

    # Number of batches to save in each part
    num_batches_per_part = 5000 // 4  # Divide 5000 batches into 4 parts

    for part in range(4):
        for i in range(num_batches_per_part):
            # Tambahkan Gaussian noise ke setiap sampel dalam batch
            x_train_noisy_batch = x_train + np.random.normal(0, noise_intensity, x_train.shape)

            # Simpan setiap batch ke dalam dataset dengan nama yang unik
            h5file.create_dataset(f'augmented/x_train_batch_part{part+1}_batch_{i+1}', data=x_train_noisy_batch, compression='gzip')
            h5file.create_dataset(f'augmented/y_train_batch_part{part+1}_batch_{i+1}', data=y_train, compression='gzip')

            # Tambahkan batch yang diaugmentasi ke list
            augmented_x_train.append(x_train_noisy_batch)
            augmented_y_train.append(y_train)

            print(f"Part {part+1}, Batch {i+1} saved: x_train shape {x_train_noisy_batch.shape}, y_train shape {y_train.shape}")

    # Menggabungkan semua augmented data menjadi satu array
    augmented_x_train = np.concatenate(augmented_x_train)  # Gabungkan semua batch
    augmented_y_train = np.concatenate(augmented_y_train)  # Gabungkan label untuk semua batch

    # Simpan data augmented ke dalam dataset utama
    h5file.create_dataset('augmented/x_train', data=augmented_x_train, compression='gzip')
    h5file.create_dataset('augmented/y_train', data=augmented_y_train, compression='gzip')

print("All augmented data has been saved into a single HDF5 file.")


Part 1, Batch 1 generated: x_train shape (90, 5400, 2), y_train shape (90, 3)
Part 1, Batch 2 generated: x_train shape (90, 5400, 2), y_train shape (90, 3)
Part 1, Batch 3 generated: x_train shape (90, 5400, 2), y_train shape (90, 3)
Part 1, Batch 4 generated: x_train shape (90, 5400, 2), y_train shape (90, 3)
Part 1, Batch 5 generated: x_train shape (90, 5400, 2), y_train shape (90, 3)
Part 1, Batch 6 generated: x_train shape (90, 5400, 2), y_train shape (90, 3)
Part 1, Batch 7 generated: x_train shape (90, 5400, 2), y_train shape (90, 3)
Part 1, Batch 8 generated: x_train shape (90, 5400, 2), y_train shape (90, 3)
Part 1, Batch 9 generated: x_train shape (90, 5400, 2), y_train shape (90, 3)
Part 1, Batch 10 generated: x_train shape (90, 5400, 2), y_train shape (90, 3)
Part 1, Batch 11 generated: x_train shape (90, 5400, 2), y_train shape (90, 3)
Part 1, Batch 12 generated: x_train shape (90, 5400, 2), y_train shape (90, 3)
Part 1, Batch 13 generated: x_train shape (90, 5400, 2), y_tr

KeyboardInterrupt: 

In [2]:
import numpy as np
import os

# Path to the directory containing the 5000 augmented files
data_directory = 'F:/Tugas Akhir/Itqan/augmentasi/'
intermediate_directory = 'F:/Tugas Akhir/Itqan/augmentasi/intermediate_files/'

# Ensure intermediate directory exists
os.makedirs(intermediate_directory, exist_ok=True)

# Get all file names sorted by name (assuming they are ordered by creation)
file_names = sorted([f for f in os.listdir(data_directory) if f.endswith('.npz')])

# Function to merge two files and save as an intermediate file
def merge_two_files(file1_path, file2_path, output_path):
    # Load the first file
    data1 = np.load(file1_path)
    x1, y1 = data1['x_train'], data1['y_train']
    
    # Load the second file
    data2 = np.load(file2_path)
    x2, y2 = data2['x_train'], data2['y_train']
    
    # Concatenate data along the first axis
    x_merged = np.concatenate((x1, x2), axis=0)
    y_merged = np.concatenate((y1, y2), axis=0)
    
    # Save the merged data
    np.savez(output_path, x_train=x_merged, y_train=y_merged)
    print(f"Merged {file1_path} and {file2_path} into {output_path}")
    
    # Delete the original files after merging
    os.remove(file1_path)
    os.remove(file2_path)
    print(f"Deleted {file1_path} and {file2_path}")

# Initial merging loop (pairwise)
pair_index = 0
intermediate_files = []

while len(file_names) > 1:
    # Take the first two files in the list and merge them
    file1 = file_names.pop(0)
    file2 = file_names.pop(0)
    
    # Set paths for each file and the intermediate output
    file1_path = os.path.join(data_directory, file1)
    file2_path = os.path.join(data_directory, file2)
    output_path = os.path.join(intermediate_directory, f'merged_{pair_index}.npz')
    
    # Merge and save the result as an intermediate file
    merge_two_files(file1_path, file2_path, output_path)
    
    # Add the intermediate file to the list for the next round
    intermediate_files.append(f'merged_{pair_index}.npz')
    pair_index += 1

# Replace the file_names list with intermediate files for further merging
file_names = intermediate_files
intermediate_files = []

# Repeat the merging process until one file remains
round_count = 1
while len(file_names) > 1:
    for i in range(0, len(file_names) - 1, 2):
        # Merge every two intermediate files
        file1 = file_names[i]
        file2 = file_names[i + 1]
        
        # Set paths for each intermediate file and the new output file
        file1_path = os.path.join(intermediate_directory, file1)
        file2_path = os.path.join(intermediate_directory, file2)
        output_path = os.path.join(intermediate_directory, f'merged_round_{round_count}_{i//2}.npz')
        
        # Merge and save the result
        merge_two_files(file1_path, file2_path, output_path)
        
        # Keep track of new intermediate files
        intermediate_files.append(f'merged_round_{round_count}_{i//2}.npz')
    
    # Update the file_names list with the new set of intermediate files
    file_names = intermediate_files
    intermediate_files = []
    round_count += 1

# The final file should be the only one left in file_names
final_merged_file = file_names[0]
print(f"Final merged file is located at {os.path.join(intermediate_directory, final_merged_file)}")


Merged F:/Tugas Akhir/Itqan/augmentasi/augmented_data_batch_1.npz and F:/Tugas Akhir/Itqan/augmentasi/augmented_data_batch_10.npz into F:/Tugas Akhir/Itqan/augmentasi/intermediate_files/merged_0.npz


PermissionError: [WinError 32] The process cannot access the file because it is being used by another process: 'F:/Tugas Akhir/Itqan/augmentasi/augmented_data_batch_1.npz'

In [3]:
import numpy as np

# Path to the merged dataset
merged_data_path = 'F:/Tugas Akhir/Itqan/combined_data_avg.npz'  # Update with your final merged file path

# Load the data
data = np.load(merged_data_path)
y_train = data['y']

# Convert one-hot encoded labels back to single labels if necessary
# Assuming y_train is one-hot encoded, convert it to a single label per sample
if y_train.ndim > 1 and y_train.shape[1] > 1:
    y_train = np.argmax(y_train, axis=1)

# Count each class
unique_classes, counts = np.unique(y_train, return_counts=True)

# Display class distribution
for class_label, count in zip(unique_classes, counts):
    print(f"Class {class_label}: {count} samples")


Class 1: 42 samples
Class 2: 42 samples
Class 3: 42 samples


In [1]:
import tensorflow as tf
print(tf.__path__)  # Gives the installation path of TensorFlow


['c:\\Users\\Asus\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\keras\\api\\_v2', 'c:\\Users\\Asus\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\tensorflow_estimator\\python\\estimator\\api\\_v2', 'c:\\Users\\Asus\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\tensorboard\\summary\\_tf', 'c:\\Users\\Asus\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\tensorflow', 'c:\\Users\\Asus\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\tensorflow\\_api\\v2']


In [2]:
import scipy
print(scipy.__path__)


['c:\\Users\\Asus\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\scipy']


In [3]:
import dlib
print(dlib.__path__)


['c:\\Users\\Asus\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\dlib']


In [None]:


def sliding_window(arr : np.ndarray, window_size : int, overlap = 0) -> np.ndarray:
    """
    Create a sliding window over a 2D NumPy array.

    Parameters:
    arr (np.ndarray): Input 2D array with shape (L, F)
    window_size (int): Size of the sliding window
    overlap (float): Overlap between windows as a percentage (0 to 1)

    Returns:
    np.ndarray: 3D array where each slice along the first dimension is a window
    """
    L, F = arr.shape
    step_size = int(window_size * (1 - overlap))
    num_windows = (L - window_size) // step_size + 1

    windows = np.array([arr[i:i + window_size] for i in range(0, num_windows * step_size, step_size)])
    return windows


In [25]:
import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  0
