In [54]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.neural_network import MLPClassifier
import joblib

In [49]:
df = pd.read_csv('/csvPH.csv' , sep=';', encoding='latin1')

In [50]:
num_cols = [c for c in df.columns if c.lower() != 'index']

In [51]:
# Bersihkan dan konversi koma ke titik, lalu numeric
for c in num_cols:
    df[c] = df[c].astype(str).str.replace(r'[^\d,.\-]', '', regex=True)
    df[c] = df[c].str.replace(',', '.')
    df[c] = pd.to_numeric(df[c], errors='coerce')

# Label sesuai aturan
def label_row(row):
    if (6 < row['pH'] < 7) and (20 < row['Temperatur (°C)'] < 30) and (20 < row['Kadar Air (%)'] < 40) and (60 < row['Kelembapan (%)'] < 80):
        return 'ok'
    else:
        return 'warning'

In [52]:
df['label'] = df.apply(label_row, axis=1)

# Fitur & target
X = df[num_cols].values
y = df['label'].values

# Encode label
le = LabelEncoder(); y_enc = le.fit_transform(y)

# Buang baris NaN bila ada
mask = ~np.isnan(X).any(axis=1)
X = X[mask]; y_enc = y_enc[mask]
df = df.iloc[mask]

# Simpan indeks asli
indices = df.index.values

# Split data beserta indeks
X_train, X_test, y_train, y_test, idx_train, idx_test = train_test_split(
    X, y_enc, indices,
    test_size=0.2,
    random_state=42,
    stratify=y_enc
)

# Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ANN (MLP)
mlp = MLPClassifier(
    hidden_layer_sizes=(32,16),
    activation='relu',
    solver='adam',
    max_iter=500,
    random_state=42,
    early_stopping=True,
    n_iter_no_change=15
)
mlp.fit(X_train_scaled, y_train)

# Prediksi
y_pred = mlp.predict(X_test_scaled)
y_pred_labels = le.inverse_transform(y_pred)

# Tampilkan jumlah label setelah klasifikasi
print("Jumlah label setelah klasifikasi:")
print(pd.Series(y_pred_labels).value_counts(), "\n")

# Indeks asli untuk prediksi 'ok' dan 'warning'
ok_indices = idx_test[y_pred_labels == 'ok']
warning_indices = idx_test[y_pred_labels == 'warning']

print("Indeks asli prediksi 'ok'     :", ok_indices.tolist())
print("Indeks asli prediksi 'warning':", warning_indices.tolist(), "\n")

# Evaluasi model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=le.classes_))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

Jumlah label setelah klasifikasi:
Name: count, dtype: int64 

Indeks asli prediksi 'ok'     : []

Accuracy: 0.995

Classification Report:
              precision    recall  f1-score   support

          ok       0.00      0.00      0.00         5

    accuracy                           0.99      1000
   macro avg       0.50      0.50      0.50      1000
weighted avg       0.99      0.99      0.99      1000

Confusion Matrix:
 [[  0   5]
 [  0 995]]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [53]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.neural_network import MLPClassifier
from imblearn.over_sampling import SMOTE

# Load dataset
df = pd.read_csv('/csvPH.csv', sep=';', encoding='latin1')

# Pilih kolom numerik (kecuali Index)
num_cols = [c for c in df.columns if c.lower() != 'index']

# Bersihkan data numerik
for c in num_cols:
    df[c] = df[c].astype(str).str.replace(r'[^\d,.\-]', '', regex=True)
    df[c] = df[c].str.replace(',', '.')
    df[c] = pd.to_numeric(df[c], errors='coerce')

# Buat label
def label_row(row):
    if (6 < row['pH'] < 7) and (20 < row['Temperatur (°C)'] < 30) and (20 < row['Kadar Air (%)'] < 40) and (60 < row['Kelembapan (%)'] < 80):
        return 'ok'
    else:
        return 'warning'

df['label'] = df.apply(label_row, axis=1)

# Distribusi label sebelum klasifikasi
print("Distribusi label awal:")
print(df['label'].value_counts(), "\n")

# Fitur & target
X = df[num_cols].values
y = df['label'].values

# Encode label
le = LabelEncoder()
y_enc = le.fit_transform(y)

# Buang baris NaN
mask = ~np.isnan(X).any(axis=1)
X = X[mask]
y_enc = y_enc[mask]
df = df.iloc[mask]

# Simpan indeks asli
indices = df.index.values

# Split data + indeks
X_train, X_test, y_train, y_test, idx_train, idx_test = train_test_split(
    X, y_enc, indices,
    test_size=0.2,
    random_state=42,
    stratify=y_enc
)

# Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# --- SMOTE Oversampling ---
print("Distribusi sebelum SMOTE:", np.bincount(y_train))
smote = SMOTE(random_state=42)
X_train_res, y_train_res = smote.fit_resample(X_train_scaled, y_train)
print("Distribusi sesudah SMOTE:", np.bincount(y_train_res), "\n")

# ANN
mlp = MLPClassifier(
    hidden_layer_sizes=(32,16),
    activation='relu',
    solver='adam',
    max_iter=500,
    random_state=42,
    early_stopping=True,
    n_iter_no_change=15
)
mlp.fit(X_train_res, y_train_res)

# Prediksi
y_pred = mlp.predict(X_test_scaled)
y_pred_labels = le.inverse_transform(y_pred)

# Jumlah label setelah klasifikasi
print("Distribusi label setelah klasifikasi:")
print(pd.Series(y_pred_labels).value_counts(), "\n")

# Indeks asli untuk prediksi 'ok' dan 'warning'
ok_indices = idx_test[y_pred_labels == 'ok']
warning_indices = idx_test[y_pred_labels == 'warning']

print("Indeks asli prediksi 'ok'     :", ok_indices.tolist())
print("Indeks asli prediksi 'warning':", warning_indices.tolist(), "\n")

# Evaluasi
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=le.classes_))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

Distribusi label awal:
label
ok           27
Name: count, dtype: int64 

Distribusi sebelum SMOTE: [  22 3978]
Distribusi sesudah SMOTE: [3978 3978] 

Distribusi label setelah klasifikasi:
ok          10
Name: count, dtype: int64 

Indeks asli prediksi 'ok'     : [1285, 2312, 1694, 2756, 1703, 4499, 2071, 4334, 1704, 2373]

Accuracy: 0.993

Classification Report:
              precision    recall  f1-score   support

          ok       0.40      0.80      0.53         5

    accuracy                           0.99      1000
   macro avg       0.70      0.90      0.76      1000
weighted avg       1.00      0.99      0.99      1000

Confusion Matrix:
 [[  4   1]
 [  6 989]]
