# ü§ñ Smart Cage - LDR Model Training & Evaluation

**Samsung Innovation Campus - Phase 3**

Notebook ini untuk:
- Load dataset CSV LDR
- Train model ML dengan scikit-learn
- Evaluate model (Accuracy, Precision, Recall, Confusion Matrix)
- Save trained model sebagai .pkl

## üì¶ Step 1: Install Dependencies

In [1]:
!pip install pandas numpy scikit-learn matplotlib seaborn joblib



## üìö Step 2: Import Libraries

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import (accuracy_score, precision_score, 
                             recall_score, f1_score, 
                             confusion_matrix, classification_report)

print("‚úÖ Libraries imported!")

‚úÖ Libraries imported!


## üìÇ Step 3: Upload Dataset

In [3]:
# Upload CSV file
from google.colab import files
uploaded = files.upload()

# Assume file name
import os
csv_file = list(uploaded.keys())[0]
print(f"üìÅ Uploaded: {csv_file}")

KeyboardInterrupt: 

## üìä Step 4: Load & Explore Dataset

In [None]:
# Load dataset
df = pd.read_csv(csv_file)

print("=" * 50)
print("üìä DATASET INFO")
print("=" * 50)
print(f"Total Rows: {len(df)}")
print(f"Columns: {list(df.columns)}")
print("\nüìã First 5 rows:")
display(df.head())

print("\nüìà Label Distribution:")
print(df["label"].value_counts())

print("\nüìä Statistical Summary:")
display(df.describe())

# Check for missing values
print("\n‚ùì Missing Values:")
print(df.isnull().sum())

## üé® Step 5: Data Visualization

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Plot 1: Label distribution
colors = {"Terang": "#FFD700", "Redup": "#FFA500", "Gelap": "#4B0082"}
label_counts = df["label"].value_counts()
label_counts.plot(kind="bar", ax=axes[0], color=[colors.get(x, "gray") for x in label_counts.index])
axes[0].set_title("Label Distribution", fontsize=14)
axes[0].set_xlabel("Label")
axes[0].set_ylabel("Count")
axes[0].tick_params(axis='x', rotation=0)

# Plot 2: LDR value distribution by label
for label in df["label"].unique():
    subset = df[df["label"] == label]
    axes[1].hist(subset["ldr_value"], bins=20, alpha=0.6, label=label, color=colors.get(label, "gray"))
axes[1].set_title("LDR Value Distribution by Label", fontsize=14)
axes[1].set_xlabel("LDR Value (0-4096)")
axes[1].set_ylabel("Frequency")
axes[1].legend()
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## üìà Step 5b: Boxplot Visualization

In [None]:
plt.figure(figsize=(10, 6))
colors_list = ["#FFD700", "#FFA500", "#4B0082"]
order = ["Terang", "Redup", "Gelap"]

sns.boxplot(x="label", y="ldr_value", data=df, order=order, palette=colors_list)
plt.title("LDR Value Range per Label")
plt.xlabel("Label (Light Condition)")
plt.ylabel("LDR Value (0=Bright, 4096=Dark)")
plt.grid(True, alpha=0.3)
plt.show()

# Summary statistics per label
print("\nüìä LDR Value Statistics per Label:")
print(df.groupby("label")["ldr_value"].describe())

## ‚úÇÔ∏è Step 6: Split Dataset

In [None]:
# Features & Target
X = df[["ldr_value"]]
y = df["label"]

# Split 80/20
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

print("=" * 50)
print("‚úÇÔ∏è DATASET SPLIT")
print("=" * 50)
print(f"Training set: {len(X_train)} samples")
print(f"Test set: {len(X_test)} samples")
print(f"\nTrain labels distribution:")
print(y_train.value_counts())
print(f"\nTest labels distribution:")
print(y_test.value_counts())

## ü§ñ Step 7: Train Model (Decision Tree)

In [None]:
# Train Decision Tree
model = DecisionTreeClassifier(random_state=42, max_depth=5)
model.fit(X_train, y_train)

print("‚úÖ Model trained!")
print(f"Model: {type(model).__name__}")

## üìä Step 8: Model Evaluation

In [None]:
# Predictions
y_pred = model.predict(X_test)

# Calculate metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average="weighted")
recall = recall_score(y_test, y_pred, average="weighted")
f1 = f1_score(y_test, y_pred, average="weighted")

print("=" * 50)
print("üìä MODEL EVALUATION METRICS")
print("=" * 50)
print(f"‚úÖ Accuracy  : {accuracy*100:.2f}%")
print(f"‚úÖ Precision : {precision*100:.2f}%")
print(f"‚úÖ Recall    : {recall*100:.2f}%")
print(f"‚úÖ F1-Score  : {f1*100:.2f}%")
print("=" * 50)

print("\nüìã Classification Report:")
print(classification_report(y_test, y_pred))

## üìà Step 9: Confusion Matrix

In [None]:
cm = confusion_matrix(y_test, y_pred)
labels = ["Terang", "Redup", "Gelap"]

plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt="d", cmap="YlOrBr", 
            xticklabels=labels, yticklabels=labels)
plt.title("Confusion Matrix - LDR Classification")
plt.ylabel("True Label")
plt.xlabel("Predicted Label")
plt.show()

print("\nüìä Confusion Matrix:")
print(cm)

## üíæ Step 10: Save Model

In [None]:
model_filename = "ldr_light_model.pkl"
joblib.dump(model, model_filename)

print("=" * 50)
print("üíæ MODEL SAVED!")
print("=" * 50)
print(f"üìÅ Filename: {model_filename}")
print(f"üìä Model Type: {type(model).__name__}")
print(f"‚úÖ Accuracy: {accuracy*100:.2f}%")
print("=" * 50)

# Download model
files.download(model_filename)
print("\nüì• Model downloaded! Upload this .pkl file to your dashboard.")

## üß™ Step 11: Test Prediction

In [None]:
# Test dengan data baru
test_data = [
    [500],    # Should be Terang
    [2000],   # Should be Redup
    [3500]    # Should be Gelap
]

predictions = model.predict(test_data)

print("=" * 50)
print("üß™ TEST PREDICTIONS")
print("=" * 50)
for i, (data, pred) in enumerate(zip(test_data, predictions)):
    print(f"Test {i+1}: LDR={data[0]} ‚Üí {pred}")
print("=" * 50)

# Interactive test
print("\nüí° LDR Value Reference:")
print("   0-1365   ‚Üí Terang (Bright)")
print("   1366-2730 ‚Üí Redup (Dim)")
print("   2731-4096 ‚Üí Gelap (Dark)")