# ü§ñ Smart Cage - Model Training & Evaluation

**Samsung Innovation Campus - Phase 3**

Notebook ini untuk:
- Load dataset CSV
- Train model ML dengan scikit-learn
- Evaluate model (Accuracy, Precision, Recall, Confusion Matrix)
- Save trained model sebagai .pkl

## üì¶ Step 1: Install Dependencies

In [None]:
!pip install pandas numpy scikit-learn matplotlib seaborn joblib

## üìö Step 2: Import Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import (accuracy_score, precision_score, 
                             recall_score, f1_score, 
                             confusion_matrix, classification_report)

print("‚úÖ Libraries imported!")

## üìÇ Step 3: Upload Dataset

In [None]:
# Upload CSV file
from google.colab import files
uploaded = files.upload()

# Assume file name
import os
csv_file = list(uploaded.keys())[0]
print(f"üìÅ Uploaded: {csv_file}")

## ÔøΩÔøΩ Step 4: Load & Explore Dataset

In [None]:
# Load dataset
df = pd.read_csv(csv_file)

print("=" * 50)
print("üìä DATASET INFO")
print("=" * 50)
print(f"Total Rows: {len(df)}")
print(f"Columns: {list(df.columns)}")
print("\nüìã First 5 rows:")
display(df.head())

print("\nüìà Label Distribution:")
print(df["label"].value_counts())

print("\nüìä Statistical Summary:")
display(df.describe())

# Check for missing values
print("\n‚ùì Missing Values:")
print(df.isnull().sum())

## üé® Step 5: Data Visualization

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(12, 4))

# Plot 1: Label distribution
df["label"].value_counts().plot(kind="bar", ax=axes[0], color=["green", "red", "blue"])
axes[0].set_title("Label Distribution")
axes[0].set_xlabel("Label")
axes[0].set_ylabel("Count")

# Plot 2: Temperature vs Humidity scatter
for label in df["label"].unique():
    subset = df[df["label"] == label]
    axes[1].scatter(subset["temperature"], subset["humidity"], label=label, alpha=0.6)
axes[1].set_title("Temperature vs Humidity")
axes[1].set_xlabel("Temperature (¬∞C)")
axes[1].set_ylabel("Humidity (%)")
axes[1].legend()
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## ‚úÇÔ∏è Step 6: Split Dataset

In [None]:
# Features & Target
X = df[["temperature", "humidity"]]
y = df["label"]

# Split 80/20
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

print("=" * 50)
print("‚úÇÔ∏è DATASET SPLIT")
print("=" * 50)
print(f"Training set: {len(X_train)} samples")
print(f"Test set: {len(X_test)} samples")
print(f"\nTrain labels distribution:")
print(y_train.value_counts())
print(f"\nTest labels distribution:")
print(y_test.value_counts())

## ü§ñ Step 7: Train Model (Decision Tree)

In [None]:
# Train Decision Tree
model = DecisionTreeClassifier(random_state=42, max_depth=5)
model.fit(X_train, y_train)

print("‚úÖ Model trained!")
print(f"Model: {type(model).__name__}")

## üìä Step 8: Model Evaluation

In [None]:
# Predictions
y_pred = model.predict(X_test)

# Calculate metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average="weighted")
recall = recall_score(y_test, y_pred, average="weighted")
f1 = f1_score(y_test, y_pred, average="weighted")

print("=" * 50)
print("üìä MODEL EVALUATION METRICS")
print("=" * 50)
print(f"‚úÖ Accuracy  : {accuracy*100:.2f}%")
print(f"‚úÖ Precision : {precision*100:.2f}%")
print(f"‚úÖ Recall    : {recall*100:.2f}%")
print(f"‚úÖ F1-Score  : {f1*100:.2f}%")
print("=" * 50)

print("\nüìã Classification Report:")
print(classification_report(y_test, y_pred))

## üìà Step 9: Confusion Matrix

In [None]:
cm = confusion_matrix(y_test, y_pred)
labels = sorted(df["label"].unique())

plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", 
            xticklabels=labels, yticklabels=labels)
plt.title("Confusion Matrix")
plt.ylabel("True Label")
plt.xlabel("Predicted Label")
plt.show()

print("\nüìä Confusion Matrix:")
print(cm)

## üíæ Step 10: Save Model

In [None]:
model_filename = "smart_cage_model.pkl"
joblib.dump(model, model_filename)

print("=" * 50)
print("üíæ MODEL SAVED!")
print("=" * 50)
print(f"üìÅ Filename: {model_filename}")
print(f"üìä Model Type: {type(model).__name__}")
print(f"‚úÖ Accuracy: {accuracy*100:.2f}%")
print("=" * 50)

# Download model
files.download(model_filename)
print("\nüì• Model downloaded! Upload this .pkl file to your dashboard.")

## üß™ Step 11: Test Prediction

In [None]:
# Test dengan data baru
test_data = [
    [30.5, 65],  # Should be Ideal
    [38, 55],    # Should be Panas
    [25, 70]     # Should be Dingin
]

predictions = model.predict(test_data)

print("=" * 50)
print("üß™ TEST PREDICTIONS")
print("=" * 50)
for i, (data, pred) in enumerate(zip(test_data, predictions)):
    print(f"Test {i+1}: T={data[0]}¬∞C, H={data[1]}% ‚Üí {pred}")
print("=" * 50)