**KLASIFIKASI**


0.   BELOK KANAN TAJAM
1.   BELOK KIRI TAJAM
2.   BELOK KANAN SIKU
3.   BELOK KIRI SIKU



In [None]:
# Import library
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from google.colab import drive
from sklearn.preprocessing import MinMaxScaler
from sklearn.utils import shuffle

## 1. Informasi Dasar Dataset


In [None]:
print("## üîÑ Mount Google Drive")
# untuk memberikan izin akses gdrive
drive.mount('/content/drive')

In [None]:
# Tentukan path/folder di Google Drive untuk menyimpan file (dataset)
SAVE_DIR = '/content/drive/MyDrive/Colab Notebooks/MLP-Learning-LineFollower/data'
print(f"‚úÖ Drive berhasil di-mount. File akan disimpan di: {SAVE_DIR}")
print("-" * 40)

INPUT_FILENAME = 'linefollower_dataset.csv'
INPUT_PATH = SAVE_DIR + INPUT_FILENAME

# Cek apakah file-nya ada
import os
print("File ditemukan ‚úÖ" if os.path.exists(INPUT_PATH) else "‚ö†Ô∏è File tidak ditemukan.")

In [None]:
# Load dataset
df = pd.read_csv(INPUT_PATH)

In [None]:
# Informasi dasar
print(df.shape)
df.info()
df.head()

## 2. Analisis Statistik Deskriptif


In [None]:
# Ringkasan statistik numerik
desc_stats = df.describe().T 
skewness = df.skew(numeric_only=True)
desc_stats['skewness'] = skewness 
desc_stats 

In [None]:
# Frekuensi kategori untuk kolom kategorikal
for col in ['gerakan']: 
    if col in df.columns:  
        print(f"\nFrekuensi nilai pada kolom {col}:")
        print(df[col].value_counts()) 

In [None]:
# Missing values per kolom (%)
missing_pct = df.isnull().mean() * 100
missing_pct 

In [None]:
# outlier IQR
def detect_outliers_iqr(df, columns):
    outlier_dict = {}
    for col in columns:
        if col in df.columns:
            Q1 = df[col].quantile(0.25)
            Q3 = df[col].quantile(0.75)
            IQR = Q3 - Q1
            lower = Q1 - 1.5 * IQR
            upper = Q3 + 1.5 * IQR
            outliers = df[(df[col] < lower) | (df[col] > upper)][[col]]
            outlier_dict[col] = outliers
    return outlier_dict

In [None]:
# outlier Z-Score
def detect_outliers_zscore(df, columns, threshold=3):
    outlier_dict = {}
    for col in columns:
        if col in df.columns:
            mean = df[col].mean()
            std = df[col].std()
            zscore = (df[col] - mean) / std
            outliers = df[zscore.abs() > threshold][[col]]
            outlier_dict[col] = outliers
    return outlier_dict

numeric_cols = df.select_dtypes(include=np.number).columns.tolist()

anomali_iqr = detect_outliers_iqr(df, numeric_cols)
anomali_z = detect_outliers_zscore(df, numeric_cols)

for col in numeric_cols:
    print(f"\nOutlier {col} (IQR):")
    display(anomali_iqr[col])

    print(f"Outlier {col} (Z-score):")
    display(anomali_z[col])

In [None]:
for col in numeric_cols:
    plt.figure(figsize=(10, 6))

    # Plot all data points
    plt.scatter(df.index, df[col], label='Normal Data', alpha=0.6)

    # Plot IQR outliers in red
    if not anomali_iqr[col].empty:
        plt.scatter(anomali_iqr[col].index, anomali_iqr[col][col], color='red', label='IQR Outliers', zorder=5)

    # Plot Z-score outliers in orange
    if not anomali_z[col].empty:
        plt.scatter(anomali_z[col].index, anomali_z[col][col], color='orange', label='Z-score Outliers', zorder=5)

    plt.title(f'Outlier Visualization for {col}')
    plt.xlabel('Index')
    plt.ylabel(col)
    plt.legend()
    plt.show()

## 3. Distribusi dan Visualisasi Univariat


In [None]:
num_cols = df.select_dtypes(include=np.number).columns.tolist()

In [None]:
# Plot histogram untuk setiap kolom numerik
for col in num_cols:
    plt.figure(figsize=(8,5))
    sns.histplot(df[col].dropna(), bins=30, kde=True)
    plt.title(f'Distribusi {col}')
    plt.xlabel(col)
    plt.ylabel('Frekuensi')
    plt.show()

In [None]:
# Boxplot per kolom (lebih detail)
for col in num_cols:
    plt.figure(figsize=(6,4))
    sns.boxplot(x=df[col])
    plt.title(f'Boxplot {col}')
    plt.show()

In [None]:
#Melihat Tabel Korelasi
corr = df.corr(numeric_only=True)
corr

## 4. Hubungan Antar Variabel

In [None]:
#Membuat visualisasi korelasi dengan heatmap
plt.figure(figsize=(18,16)) 
sns.heatmap(corr, annot=True, cmap='coolwarm', center=0)
plt.title("Heatmap Korelasi")
plt.show()

In [None]:
# Scatter plots for all pairs of numeric columns
numeric_cols = df.select_dtypes(include=np.number).columns.tolist()

# Iterate through all unique pairs of numeric columns
for i in range(len(numeric_cols)):
    for j in range(i + 1, len(numeric_cols)):
        col1 = numeric_cols[i]
        col2 = numeric_cols[j]
        plt.figure(figsize=(8, 6))
        sns.scatterplot(x=col1, y=col2, data=df)
        plt.title(f'Scatter Plot: {col1.replace("_", " ").title()} vs {col2.replace("_", " ").title()}')
        plt.xlabel(col1.replace("_", " ").title())
        plt.ylabel(col2.replace("_", " ").title())
        plt.show()

In [None]:
numeric_cols = df.select_dtypes(include=np.number).columns.tolist()

max_cols_for_pairplot = 11
if len(numeric_cols) > max_cols_for_pairplot:
    print(f"Warning: There are {len(numeric_cols)} numeric columns. Displaying pairplot for the first {max_cols_for_pairplot} columns for performance.")
    cols_to_pairplot = numeric_cols[:max_cols_for_pairplot]
else:
    cols_to_pairplot = numeric_cols

sns.pairplot(df[cols_to_pairplot].dropna(), diag_kind="kde")
plt.show()

In [None]:
# Boxplot for numeric columns by categorical variables
categorical_cols_for_boxplot = ['gerakan']
numeric_cols_for_boxplot = df.select_dtypes(include=np.number).columns.tolist() # Select all numeric columns

for cat_col in categorical_cols_for_boxplot:
    if cat_col in df.columns:
        for num_col in numeric_cols_for_boxplot:
            plt.figure(figsize=(8, 5))
            sns.boxplot(x=cat_col, y=num_col, data=df, palette='Set2')
            plt.title(f"Distribusi {num_col.replace('_', ' ').title()} berdasarkan {cat_col.replace('_', ' ').title()}")
            plt.xlabel(cat_col.replace('_', ' ').title())
            plt.ylabel(num_col.replace('_', ' ').title())
            plt.xticks(rotation=0)
            plt.show()

In [None]:
# Display columns with IQR outliers
iqr_outlier_cols = [col for col, outliers in anomali_iqr.items() if not outliers.empty]
print("Kolom dengan outlier (IQR):", iqr_outlier_cols)

# Display columns with Z-score outliers
z_score_outlier_cols = [col for col, outliers in anomali_z.items() if not outliers.empty]
print("Kolom dengan outlier (Z-score):", z_score_outlier_cols)

# Transform Data Robot

In [None]:
sensor_cols = ['IR 1', 'IR 2', 'IR 3', 'IR 4', 'IR 5', 'IR 6', 'IR 7', 'IR 8', 'IR 9', 'IR 10']
target_col = 'gerakan'

print("### Mulai Data Pre-processing untuk MLP ###")
print("-" * 40)

# Transformasi Logaritmik (Mengatasi Skewness & Outlier)
print("\n1. Transformasi Log1p untuk sensor IR...")

cols_log = [] # menyimpan nama kolom yang sudah di-log
for col in sensor_cols:
    new_col = f'{col}_log'
    df[new_col] = np.log1p(df[col])   # log(1+x)
    cols_log.append(new_col)

print(f"   -> Selesai. Data sensor log: {cols_log}")

# Normalisasi Min-Max Scaling ke [0, 1]
print("\n2. Normalisasi Min-Max...")
scaler = MinMaxScaler()
df[cols_log] = scaler.fit_transform(df[cols_log])
print("   -> Scaling selesai (range 0‚Äì1).")

# 3Ô∏è‚É£ Pengacakan Data
print("\n3. Mengacak data (Shuffling)...")

df = shuffle(df, random_state=42).reset_index(drop=True)
print(" ¬† -> Dataset berhasil diacak!")

# 4Ô∏è‚É£ PEMISAHAN FITUR (X_full) dan TARGET (y_full)
print("\n4. Pemisahan Fitur yang Diolah (X) dan Target (y) dari Data Utuh...")
X_full = df[cols_log]
y_full = df[target_col]

print(f"  -> X (Fitur Olahan) Utuh: {X_full.shape}")
print(f"  -> y (Target) Utuh: {y_full.shape}")

print("\n5. Menyimpan Data Hasil Olahan Utuh ke Google Drive...")

processed_full_df = pd.concat([X_full.reset_index(drop=True), y_full.reset_index(drop=True)], axis=1)
save_name = 'linefollower_dataset_transformed.csv'

processed_full_df.to_csv(SAVE_DIR + save_name, index=False)
print(f"   -> File berhasil disimpan di folder '{SAVE_DIR}'.")

In [None]:
# --- EKSTRAKSI NILAI SCALER ---
print("\n### NILAI SCALER UNTUK IMPLEMENTASI MIKROKONTROLER ###")
print("-" * 50)

print("// Nilai minimum (data_min_) yang ditemukan untuk setiap fitur (setelah di-log):")
print("float SCALER_MIN[] = {", ', '.join(map(str, np.round(scaler.data_min_, 6).tolist())), "};")

print("\n// Nilai rentang (Max - Min) yang ditemukan untuk setiap fitur (setelah di-log):")
print("float SCALER_RANGE[] = {", ', '.join(map(str, np.round(scaler.data_range_, 6).tolist())), "};")
print("-" * 50)

In [None]:
import joblib

drive_path_pkl = '/content/drive/MyDrive/Colab Notebooks/MLP-Learning-LineFollower/models/scaler.pkl'

joblib.dump(scaler, drive_path_pkl)

print(f"‚úÖ Objek Scaler disimpan di Drive pada: {drive_path_pkl}")