# Proyek ML

## Permasalahan  yang dihadapi

### Penjelasan

## Alasan pemilihan data


### Penjelasan

## Library

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import cv2
import os
import numpy as np
from sklearn.preprocessing import LabelEncoder

## EDA

### EDA_age

In [None]:
# Load data CSV
csv_path = './data/at/features.csv'  # Sesuaikan path kalau berbeda
df = pd.read_csv(csv_path)

# Cek beberapa data awal
print(df.head())

# Set style for better looking plots
sns.set(style="whitegrid")

# --- Visualisasi 1: Histogram Sebaran Usia ---
plt.figure(figsize=(8, 6))
sns.histplot(df['age'], kde=True, color='skyblue', bins=15)

plt.title('Distribusi Usia dalam Dataset', fontsize=14)
plt.xlabel('Usia', fontsize=12)
plt.ylabel('Frekuensi', fontsize=12)
plt.tight_layout()
plt.show()

# --- Visualisasi 2: Boxplot Usia Berdasarkan Gender ---
plt.figure(figsize=(8, 6))
sns.boxplot(x='gender', y='age', data=df, palette='Set2')

plt.title('Penyebaran Usia Berdasarkan Gender', fontsize=14)
plt.xlabel('Gender (M/F)', fontsize=12)
plt.ylabel('Usia', fontsize=12)
plt.tight_layout()
plt.show()


### EDA_gender


In [None]:
# Load data CSV
csv_path = './data/at/features.csv'  # Sesuaikan path kalau berbeda
df = pd.read_csv(csv_path)

# Cek beberapa data awal
print(df.head())

# Visualisasi distribusi gender
plt.figure(figsize=(6, 4))
sns.countplot(x='gender', data=df, palette='Set2')

plt.title('Distribusi Gender pada Dataset Wajah', fontsize=14)
plt.xlabel('Gender (M/F)', fontsize=12)
plt.ylabel('Jumlah Foto', fontsize=12)
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()

plt.show()

### Penjelasan


## Data Preprocessing

### Resize image

In [None]:
# Fungsi untuk melakukan resize dan normalisasi gambar
def preprocess_images(image_folder, target_size=(200, 200)):
    images = []
    labels = []
    image_paths = []
    
    # Loop melalui semua file gambar dalam folder
    for filename in os.listdir(image_folder):
        if filename.endswith('.pgm'):  # Hanya file .pgm
            img_path = os.path.join(image_folder, filename)
            img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)  # Baca gambar dalam grayscale
            img = cv2.resize(img, target_size)  # Resize gambar ke target_size
            
            # Normalisasi pixel (ubah rentang 0-255 menjadi 0-1)
            img = img / 255.0  # Normalisasi
            
            images.append(img)
            image_paths.append(img_path)
    
    return np.array(images), image_paths

# Fungsi untuk preprocessing data CSV (encoding label)
def preprocess_csv(csv_path):
    df = pd.read_csv(csv_path)

    # Encode label ID (misalnya, ID user jadi angka)
    label_encoder = LabelEncoder()
    df['ID'] = label_encoder.fit_transform(df['ID'])
    
    # Extract features (age, gender, timestamp) untuk model training
    df_features = df[['age', 'gender', 'timestamp']].copy()
    
    # Encode gender menjadi angka (M -> 1, F -> 0)
    df_features['gender'] = df_features['gender'].map({'M': 1, 'F': 0})

    return df_features, label_encoder

# Path ke folder gambar dan file CSV
image_folder = './data/at/j'  # Sesuaikan path jika perlu
csv_path = './data/at/features.csv'

# Preprocess gambar
images, image_paths = preprocess_images(image_folder)

# Preprocess CSV untuk fitur tambahan
features, label_encoder = preprocess_csv(csv_path)

# Cek beberapa hasil preprocessing
print(f"Jumlah gambar yang diproses: {len(images)}")
print(f"Beberapa fitur: {features.head()}")

# Simpan hasil preprocessing gambar dan fitur ke file atau variabel lain jika perlu
# Misalnya, kita bisa simpan hasil fitur dan label encoder untuk nanti.


### Histogram Equalization

In [None]:
# Fungsi preprocessing gambar dengan histogram equalization
def preprocess_images_with_histogram_equalization(image_folder, target_size=(200, 200)):
    images = []
    labels = []
    
    for filename in os.listdir(image_folder):
        if filename.endswith('.pgm'):
            img_path = os.path.join(image_folder, filename)
            img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
            img = cv2.resize(img, target_size)
            
            # Histogram Equalization untuk meningkatkan kontras
            img_eq = cv2.equalizeHist(img)
            
            # Normalisasi pixel 0-1
            img_eq = img_eq / 255.0
            
            images.append(img_eq)
            labels.append(filename)
    
    return np.array(images), labels

# Fungsi preprocessing CSV (bisa sama seperti sebelumnya)
def preprocess_csv(csv_path):
    df = pd.read_csv(csv_path)

    # Encode label ID
    label_encoder = LabelEncoder()
    df['ID'] = label_encoder.fit_transform(df['ID'])
    
    # Encode gender
    df['gender'] = df['gender'].map({'M': 1, 'F': 0})
    
    # Isi missing value jika ada
    if df['age'].isnull().sum() > 0:
        df['age'] = df['age'].fillna(df['age'].median())
    if df['timestamp'].isnull().sum() > 0:
        df['timestamp'] = df['timestamp'].fillna(df['timestamp'].mean())

    return df

# Path ke folder dan CSV
image_folder = './data/at/j'
csv_path = './data/at/features.csv'

# Preprocessing dengan histogram equalization
images, labels = preprocess_images_with_histogram_equalization(image_folder)

# Preprocessing CSV
features = preprocess_csv(csv_path)

# Cek hasil
print(f"Jumlah gambar setelah histogram equalization: {len(images)}")
print(f"Beberapa data fitur: {features.head()}")


### Penjelasan

## Pelatihan dan Penyetelan Model

### Penjelasan

## Pengukuran kinerja / evaluasi model

### Penjelasan

## Visualisasi hasil

### Penjelasan

## Kesimpulan

### Penjelasan