In [None]:
import pandas as pd
import numpy as np

In [74]:
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

In [75]:
data = pd.read_csv('water_potability.csv')
data

Unnamed: 0,ph,Hardness,Solids,Chloramines,Sulfate,Conductivity,Organic_carbon,Trihalomethanes,Turbidity,Potability
0,,204.890455,20791.318981,7.300212,368.516441,564.308654,10.379783,86.990970,2.963135,0
1,3.716080,129.422921,18630.057858,6.635246,,592.885359,15.180013,56.329076,4.500656,0
2,8.099124,224.236259,19909.541732,9.275884,,418.606213,16.868637,66.420093,3.055934,0
3,8.316766,214.373394,22018.417441,8.059332,356.886136,363.266516,18.436524,100.341674,4.628771,0
4,9.092223,181.101509,17978.986339,6.546600,310.135738,398.410813,11.558279,31.997993,4.075075,0
...,...,...,...,...,...,...,...,...,...,...
3271,4.668102,193.681735,47580.991603,7.166639,359.948574,526.424171,13.894419,66.687695,4.435821,1
3272,7.808856,193.553212,17329.802160,8.061362,,392.449580,19.903225,,2.798243,1
3273,9.419510,175.762646,33155.578218,7.350233,,432.044783,11.039070,69.845400,3.298875,1
3274,5.126763,230.603758,11983.869376,6.303357,,402.883113,11.168946,77.488213,4.708658,1


In [76]:
data.isnull().sum()

ph                 491
Hardness             0
Solids               0
Chloramines          0
Sulfate            781
Conductivity         0
Organic_carbon       0
Trihalomethanes    162
Turbidity            0
Potability           0
dtype: int64

In [77]:
data = data.fillna(data.mean())

In [78]:
data.isnull().sum()

ph                 0
Hardness           0
Solids             0
Chloramines        0
Sulfate            0
Conductivity       0
Organic_carbon     0
Trihalomethanes    0
Turbidity          0
Potability         0
dtype: int64

In [7]:
counts = data['Potability'].value_counts()
proportions = data['Potability'].value_counts(normalize=True) * 100

print("Label Distribution:")
print(counts)
print("\nProportions (%):")
print(proportions.round(2))

Label Distribution:
Potability
0    1998
1    1278
Name: count, dtype: int64

Proportions (%):
Potability
0    60.99
1    39.01
Name: proportion, dtype: float64


In [8]:
from sklearn.impute import SimpleImputer
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

X = data.drop('Potability', axis=1)
y = data['Potability']

imputer = SimpleImputer(strategy='mean')
X_imputed = imputer.fit_transform(X)

smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_imputed, y)

X_train, X_test, y_train, y_test = train_test_split(
    X_resampled, y_resampled, 
    test_size=0.2, 
    stratify=y_resampled, 
    random_state=42
)

svm_model = SVC(kernel='rbf', C=1.0, gamma='scale', random_state=42)
svm_model.fit(X_train, y_train)

y_pred = svm_model.predict(X_test)

print("Classification Report:\n")
print(classification_report(y_test, y_pred))


Classification Report:

              precision    recall  f1-score   support

           0       0.51      0.81      0.62       400
           1       0.54      0.22      0.31       400

    accuracy                           0.52       800
   macro avg       0.52      0.52      0.47       800
weighted avg       0.52      0.52      0.47       800



In [9]:
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

X = data.drop('Potability', axis=1)
y = data['Potability']

imputer = SimpleImputer(strategy='mean')
X_imputed = imputer.fit_transform(X)

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_imputed)

smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_scaled, y)

X_train, X_test, y_train, y_test = train_test_split(
    X_resampled, y_resampled,
    test_size=0.2,
    stratify=y_resampled,
    random_state=42
)

svm = SVC()

# Grid parameter tuning
param_grid = {
    'C': [0.1, 1, 10],
    'gamma': [0.01, 0.1, 1],
    'kernel': ['rbf']
}

grid = GridSearchCV(
    estimator=svm,
    param_grid=param_grid,
    scoring='accuracy',
    cv=5,
    verbose=1,
    n_jobs=-1
)

grid.fit(X_train, y_train)

print("Best Parameters:", grid.best_params_)

y_pred = grid.predict(X_test)

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

Fitting 5 folds for each of 9 candidates, totalling 45 fits
Best Parameters: {'C': 10, 'gamma': 1, 'kernel': 'rbf'}

Classification Report:
              precision    recall  f1-score   support

           0       0.71      0.82      0.76       400
           1       0.79      0.67      0.72       400

    accuracy                           0.74       800
   macro avg       0.75      0.74      0.74       800
weighted avg       0.75      0.74      0.74       800



In [8]:
data.describe()

Unnamed: 0,ph,Hardness,Solids,Chloramines,Sulfate,Conductivity,Organic_carbon,Trihalomethanes,Turbidity,Potability
count,3276.0,3276.0,3276.0,3276.0,3276.0,3276.0,3276.0,3276.0,3276.0,3276.0
mean,7.080795,196.369496,22014.092526,7.122277,333.775777,426.205111,14.28497,66.396293,3.966786,0.39011
std,1.469956,32.879761,8768.570828,1.583085,36.142612,80.824064,3.308162,15.769881,0.780382,0.487849
min,0.0,47.432,320.942611,0.352,129.0,181.483754,2.2,0.738,1.45,0.0
25%,6.277673,176.850538,15666.690297,6.127421,317.094638,365.734414,12.065801,56.647656,3.439711,0.0
50%,7.080795,196.967627,20927.833607,7.130299,333.775777,421.884968,14.218338,66.396293,3.955028,0.0
75%,7.87005,216.667456,27332.762127,8.114887,350.385756,481.792304,16.557652,76.666609,4.50032,1.0
max,14.0,323.124,61227.196008,13.127,481.030642,753.34262,28.3,124.0,6.739,1.0


In [19]:
import optuna
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from imblearn.over_sampling import SMOTE

# --- Preprocessing (dari sebelumnya)
X = data.drop('Potability', axis=1)
y = data['Potability']

imputer = SimpleImputer(strategy='mean')
X_imputed = imputer.fit_transform(X)

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_imputed)

smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_scaled, y)

X_train, X_test, y_train, y_test = train_test_split(
    X_resampled, y_resampled, test_size=0.2, stratify=y_resampled, random_state=42
)

# --- Optuna objective function
def objective(trial):
    C = trial.suggest_loguniform('C', 0.01, 100)
    gamma = trial.suggest_loguniform('gamma', 0.0001, 1.0)
    
    model = SVC(kernel='rbf', C=C, gamma=gamma, random_state=42)
    model.fit(X_train, y_train)
    
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    return acc

# --- Run Optimization
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=30)

# --- Best Result
print("Best trial:")
print(study.best_trial.params)

# --- Evaluate Final Model
best_params = study.best_trial.params
final_model = SVC(kernel='rbf', **best_params)
final_model.fit(X_train, y_train)

from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay
y_pred = final_model.predict(X_test)
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

[I 2025-05-07 14:06:30,835] A new study created in memory with name: no-name-0a2bb3bf-8794-4d75-95f3-9b2355c851f1


[I 2025-05-07 14:06:31,335] Trial 0 finished with value: 0.70625 and parameters: {'C': 3.537173236585714, 'gamma': 0.33104721555411093}. Best is trial 0 with value: 0.70625.
[I 2025-05-07 14:06:31,764] Trial 1 finished with value: 0.61875 and parameters: {'C': 3.0897912202910747, 'gamma': 0.03703102153607689}. Best is trial 0 with value: 0.70625.
[I 2025-05-07 14:06:32,249] Trial 2 finished with value: 0.54875 and parameters: {'C': 0.5246742268186011, 'gamma': 0.00020326276188697504}. Best is trial 0 with value: 0.70625.
[I 2025-05-07 14:06:32,745] Trial 3 finished with value: 0.5875 and parameters: {'C': 0.10200356435114444, 'gamma': 0.6365409612469272}. Best is trial 0 with value: 0.70625.
[I 2025-05-07 14:06:33,221] Trial 4 finished with value: 0.58625 and parameters: {'C': 0.025189593222438012, 'gamma': 0.06903572764333604}. Best is trial 0 with value: 0.70625.
[I 2025-05-07 14:06:33,888] Trial 5 finished with value: 0.6525 and parameters: {'C': 37.345675697054105, 'gamma': 0.09005

Best trial:
{'C': 1.9655992987118482, 'gamma': 0.9545688582277057}

Classification Report:
              precision    recall  f1-score   support

           0       0.72      0.82      0.77       400
           1       0.79      0.68      0.73       400

    accuracy                           0.75       800
   macro avg       0.75      0.75      0.75       800
weighted avg       0.75      0.75      0.75       800



In [18]:
import numpy as np
import pandas as pd
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, ConfusionMatrixDisplay
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from imblearn.over_sampling import SMOTE
import matplotlib.pyplot as plt
from mealpy.swarm_based.BA import OriginalBA  # Gunakan versi terbaru mealpy

# -------------------------------
# 1. Data Preprocessing
# -------------------------------
# Ganti 'data' dengan DataFrame Anda
X = data.drop('Potability', axis=1)
y = data['Potability']

imputer = SimpleImputer(strategy='mean')
X_imputed = imputer.fit_transform(X)

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_imputed)

smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_scaled, y)

X_train, X_test, y_train, y_test = train_test_split(
    X_resampled, y_resampled, test_size=0.2, stratify=y_resampled, random_state=42
)

# -------------------------------
# 2. Fungsi Objektif untuk BA
# -------------------------------
def fitness(solution):
    C, gamma = solution
    C = np.clip(C, 0.01, 100)
    gamma = np.clip(gamma, 0.0001, 1.0)
    
    model = SVC(kernel='rbf', C=C, gamma=gamma)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    return -acc  # Karena kita minimisasi

# -------------------------------
# 3. Definisi Problem + BA
# -------------------------------
problem = {
    "fit_func": fitness,
    "bounds": [[0.01, 0.0001], [100, 1.0]],  # Lower & Upper bounds
    "minmax": "min"
}

model_ba = OriginalBA(epoch=30, pop_size=20)
best_agent = model_ba.solve(problem)
best_solution = best_agent.solution
best_fitness = best_agent.target

best_C, best_gamma = best_solution
print(f"Best C: {best_C:.4f}, Best gamma: {best_gamma:.4f}, Accuracy: {-best_fitness:.4f}")

# -------------------------------
# 4. Evaluasi Final Model
# -------------------------------
final_model = SVC(kernel='rbf', C=best_C, gamma=best_gamma)
final_model.fit(X_train, y_train)
y_pred = final_model.predict(X_test)

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

ValueError: Invalid bounds. All variables in bounds should be an instance of (<class 'mealpy.utils.space.IntegerVar'>, <class 'mealpy.utils.space.FloatVar'>, <class 'mealpy.utils.space.PermutationVar'>, <class 'mealpy.utils.space.StringVar'>, <class 'mealpy.utils.space.BinaryVar'>, <class 'mealpy.utils.space.BoolVar'>, <class 'mealpy.utils.space.MixedSetVar'>)