In [None]:
# 1. Install package
!pip install ucimlrepo scikit-fuzzy

In [None]:
import numpy as np
import pandas as pd
from ucimlrepo import fetch_ucirepo
import matplotlib.pyplot as plt
import skfuzzy as fuzz
from skfuzzy import control as ctrl

In [None]:
# 2. Fungsi custom split
def custom_split(X, y, test_size=0.2, random_state=None):
    if random_state is not None:
        np.random.seed(random_state)
    num_samples = len(X)
    indices = np.arange(num_samples)
    np.random.shuffle(indices)
    num_test_samples = int(num_samples * test_size)
    test_indices = indices[:num_test_samples]
    train_indices = indices[num_test_samples:]
    return X[train_indices], X[test_indices], y[train_indices], y[test_indices]

In [None]:
# 3. Min-max scaler
def min_max_scaler(data):
    min_val = np.min(data)
    max_val = np.max(data)
    scaled_data = [(x - min_val) / (max_val - min_val) for x in data]
    return np.array(scaled_data)

In [None]:
# 4. Ambil dataset abalone
abalone = fetch_ucirepo(id=1)
X = abalone.data.features
y = abalone.data.targets

# Tampilkan metadata dan variabel
print(abalone.metadata)
print(abalone.variables)

# One-hot encoding pada kolom 'Sex'
encoded = pd.get_dummies(X['Sex'], prefix='Sex')
X = pd.concat([X, encoded], axis=1)
X.drop("Sex", axis=1, inplace=True)

# Konversi ke numpy array
X = X.values
y = y.values

# Split data
X_train, X_test, y_train, y_test = custom_split(X, y, test_size=0.2, random_state=42)

# Scaling data
X_train_scaled = min_max_scaler(X_train)
X_test_scaled = min_max_scaler(X_test)

In [None]:
# 5. Visualisasi Boxplot untuk fitur kontinyu
features_plot = abalone.data.features
features_plot.drop("Sex", axis=1).boxplot(figsize=(12, 8))
plt.title('Boxplots of Continuous Features')
plt.xlabel('Features')
plt.ylabel('Value')
plt.show()

In [None]:
# 6. Fuzzy Sugeno System
diameter = ctrl.Antecedent(np.arange(0, 1.01, 0.01), 'diameter')
whole_weight = ctrl.Antecedent(np.arange(0, 1.01, 0.01), 'whole_weight')
adjustment = ctrl.Consequent(np.arange(-2, 2.1, 0.1), 'adjustment')

diameter.automf(3)  # poor, average, good
whole_weight.automf(3)
adjustment['decrease'] = fuzz.trimf(adjustment.universe, [-2, -1, 0])
adjustment['none'] = fuzz.trimf(adjustment.universe, [-0.5, 0, 0.5])
adjustment['increase'] = fuzz.trimf(adjustment.universe, [0, 1, 2])

rule1 = ctrl.Rule(diameter['poor'] | whole_weight['poor'], adjustment['decrease'])
rule2 = ctrl.Rule(diameter['average'] & whole_weight['average'], adjustment['none'])
rule3 = ctrl.Rule(diameter['good'] | whole_weight['good'], adjustment['increase'])

adjustment_ctrl = ctrl.ControlSystem([rule1, rule2, rule3])
adjustment_simulator = ctrl.ControlSystemSimulation(adjustment_ctrl)

In [None]:
# 7. KNN functions
def compute_distances(X_train, x):
    X_train = np.array(X_train, dtype=float)
    x = np.array(x, dtype=float)
    distances = np.sqrt(np.sum((X_train - x)**2, axis=1))
    return distances

def predict_single_instance(X_train, y_train, x, k):
    distances = compute_distances(X_train, x)
    k_indices = np.argsort(distances)[:k]
    k_nearest_targets = y_train[k_indices]
    return np.mean(k_nearest_targets)

def predict(X_train, y_train, X_test, k):
    predictions = []
    for i in range(y_train.shape[1]):
        y_train_column = y_train[:, i]
        y_pred_column = []

        for x in X_test:
            pred = predict_single_instance(X_train, y_train_column, x, k)
            # Koreksi menggunakan fuzzy
            adjustment_simulator.input['diameter'] = x[1]  # indeks ke-1 = Diameter
            adjustment_simulator.input['whole_weight'] = x[4]  # indeks ke-4 = Whole Weight
            adjustment_simulator.compute()
            correction = adjustment_simulator.output['adjustment']
            y_pred_column.append(pred + correction)

        predictions.append(np.array(y_pred_column))

    return np.array(predictions).T

def r2(y_true, y_pred):
    mean_true = np.mean(y_true)
    ss_total = np.sum((y_true - mean_true)**2)
    ss_residual = np.sum((y_true - y_pred)**2)
    r2 = 1 - (ss_residual / ss_total)
    return r2

In [None]:
# 8. Evaluasi untuk setiap nilai k
k_list = [x for x in range(1, 20)]
r2_list = []

for k in k_list:
    y_pred = predict(X_train_scaled, y_train, X_test_scaled, k)
    r2_list.append(r2(y_test, y_pred))

In [None]:
# 9. Cari nilai k terbaik
best_k = None
best_r2 = -float('inf')
for k, r2_val in zip(k_list, r2_list):
    if r2_val > best_r2:
        best_k = k
        best_r2 = r2_val

print(f"Best k: {best_k}")
print(f"Best R² score: {best_r2}")

In [None]:
# 10. Visualisasi nilai k vs R-squared
plt.plot(k_list, r2_list)
plt.xlabel("Nilai K")
plt.ylabel("R-squared (R2)")
plt.title("Perbandingan Nilai K terhadap R-squared")
plt.grid(True)

for k, r2_val in zip(k_list, r2_list):
    plt.annotate(f'{r2_val:.3f}', (k, r2_val), textcoords="offset points", xytext=(0, 1), ha='center', fontsize=6)

max_r2_index = r2_list.index(max(r2_list))
plt.scatter(k_list[max_r2_index], r2_list[max_r2_index], color='red', zorder=5)
plt.show()