In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score

In [26]:
df = pd.read_csv('sleep_health.csv')

In [18]:
df.head()

Unnamed: 0,Person ID,Gender,Age,Occupation,Sleep Duration,Quality of Sleep,Physical Activity Level,Stress Level,BMI Category,Blood Pressure,Heart Rate,Daily Steps,Sleep Disorder
0,1,Male,27,Software Engineer,6.1,6,42,6,Overweight,126/83,77,4200,
1,2,Male,28,Doctor,6.2,6,60,8,Normal,125/80,75,10000,
2,3,Male,28,Doctor,6.2,6,60,8,Normal,125/80,75,10000,
3,4,Male,28,Sales Representative,5.9,4,30,8,Obese,140/90,85,3000,Sleep Apnea
4,5,Male,28,Sales Representative,5.9,4,30,8,Obese,140/90,85,3000,Sleep Apnea


In [19]:
df.count()

Person ID                  374
Gender                     374
Age                        374
Occupation                 374
Sleep Duration             374
Quality of Sleep           374
Physical Activity Level    374
Stress Level               374
BMI Category               374
Blood Pressure             374
Heart Rate                 374
Daily Steps                374
Sleep Disorder             155
dtype: int64

In [None]:
df = df.drop("Occupation", axis=1)
df['Sleep Disorder'] = df['Sleep Disorder'].fillna('None')

# Pisahkan kolom blood_pressure menjadi dua kolom
df[['Systolic', 'Diastolic']] = df['Blood Pressure'].str.split('/', expand=True)

# Ubah ke tipe data numerik (optional tapi disarankan)
df['Systolic'] = pd.to_numeric(df['Systolic'])
df['Diastolic'] = pd.to_numeric(df['Diastolic'])

df = df.drop("Blood Pressure", axis=1)
df = df.drop("Person ID", axis=1)

In [28]:
df.head()

Unnamed: 0,Gender,Age,Sleep Duration,Quality of Sleep,Physical Activity Level,Stress Level,BMI Category,Heart Rate,Daily Steps,Sleep Disorder,Systolic,Diastolic
0,Male,27,6.1,6,42,6,Overweight,77,4200,,126,83
1,Male,28,6.2,6,60,8,Normal,75,10000,,125,80
2,Male,28,6.2,6,60,8,Normal,75,10000,,125,80
3,Male,28,5.9,4,30,8,Obese,85,3000,Sleep Apnea,140,90
4,Male,28,5.9,4,30,8,Obese,85,3000,Sleep Apnea,140,90


In [29]:
print("\nCek nilai kosong:")
print(df.isnull().sum())


Cek nilai kosong:
Gender                     0
Age                        0
Sleep Duration             0
Quality of Sleep           0
Physical Activity Level    0
Stress Level               0
BMI Category               0
Heart Rate                 0
Daily Steps                0
Sleep Disorder             0
Systolic                   0
Diastolic                  0
dtype: int64


In [30]:
# Kolom-kolom kategorikal yang ingin diubah
categorical_cols = ['Gender', 'Sleep Disorder', 'BMI Category']

# Ubah setiap kolom dengan np.unique dan simpan mapping-nya jika perlu
for col in categorical_cols:
    unique_vals, encoded_vals = np.unique(df[col], return_inverse=True)
    df[col] = encoded_vals
    print(f"Mapping untuk kolom '{col}':")
    for i, val in enumerate(unique_vals):
        print(f"  {i} => {val}")

Mapping untuk kolom 'Gender':
  0 => Female
  1 => Male
Mapping untuk kolom 'Sleep Disorder':
  0 => Insomnia
  1 => None
  2 => Sleep Apnea
Mapping untuk kolom 'BMI Category':
  0 => Normal
  1 => Normal Weight
  2 => Obese
  3 => Overweight


In [31]:
X = df.drop('Sleep Disorder', axis=1)  # fitur
y = df['Sleep Disorder']              # label

In [32]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [33]:
scaler = MinMaxScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [34]:
X_train_scaled_df = pd.DataFrame(X_train_scaled, columns=X.columns)
X_test_scaled_df = pd.DataFrame(X_test_scaled, columns=X.columns)

In [35]:
class FKNN:
    def __init__(self, k=5, m=2):
        """
        Fuzzy K-Nearest Neighbors classifier
        
        Parameters:
        -----------
        k : int, default=5
            Number of neighbors to consider
        m : float, default=2
            Fuzzyness parameter (m > 1)
        """
        self.k = k
        self.m = m
        self.X_train = None
        self.y_train = None
        self.classes_ = None
        self.label_encoder = LabelEncoder()
        
    def fit(self, X, y):
        """
        Fit the model using X as training data and y as target values
        
        Parameters:
        -----------
        X : array-like, shape (n_samples, n_features)
            Training data
        y : array-like, shape (n_samples,)
            Target values
        """
        self.X_train = np.array(X)
        self.y_train = self.label_encoder.fit_transform(y)
        self.classes_ = self.label_encoder.classes_
        
    def predict(self, X):
        """
        Predict the class labels for the provided data
        
        Parameters:
        -----------
        X : array-like, shape (n_samples, n_features)
            Test samples
            
        Returns:
        --------
        y_pred : array, shape (n_samples,)
            Predicted class labels
        """
        X = np.array(X)
        n_samples = X.shape[0]
        y_pred = np.empty(n_samples, dtype=self.classes_.dtype)
        
        # Find k nearest neighbors for each test sample
        knn = NearestNeighbors(n_neighbors=self.k)
        knn.fit(self.X_train)
        distances, indices = knn.kneighbors(X)
        
        # Calculate membership values
        for i in range(n_samples):
            neighbor_classes = self.y_train[indices[i]]
            neighbor_distances = distances[i]
            
            # Handle zero distances (avoid division by zero)
            neighbor_distances = np.where(neighbor_distances == 0, 1e-10, neighbor_distances)
            
            # Calculate weights (fuzzy membership)
            weights = 1 / (neighbor_distances ** (2 / (self.m - 1)))
            sum_weights = np.sum(weights)
            normalized_weights = weights / sum_weights
            
            # Calculate class probabilities
            class_prob = np.zeros(len(self.classes_))
            for j in range(self.k):
                class_idx = neighbor_classes[j]
                class_prob[class_idx] += normalized_weights[j]
                
            # Predict the class with highest probability
            y_pred[i] = self.classes_[np.argmax(class_prob)]
            
        return y_pred
    
    def predict_proba(self, X):
        """
        Predict class probabilities for the provided data
        
        Parameters:
        -----------
        X : array-like, shape (n_samples, n_features)
            Test samples
            
        Returns:
        --------
        proba : array, shape (n_samples, n_classes)
            Class probabilities
        """
        X = np.array(X)
        n_samples = X.shape[0]
        proba = np.zeros((n_samples, len(self.classes_)))
        
        # Find k nearest neighbors for each test sample
        knn = NearestNeighbors(n_neighbors=self.k)
        knn.fit(self.X_train)
        distances, indices = knn.kneighbors(X)
        
        # Calculate membership values
        for i in range(n_samples):
            neighbor_classes = self.y_train[indices[i]]
            neighbor_distances = distances[i]
            
            # Handle zero distances
            neighbor_distances = np.where(neighbor_distances == 0, 1e-10, neighbor_distances)
            
            # Calculate weights (fuzzy membership)
            weights = 1 / (neighbor_distances ** (2 / (self.m - 1)))
            sum_weights = np.sum(weights)
            normalized_weights = weights / sum_weights
            
            # Calculate class probabilities
            for j in range(self.k):
                class_idx = neighbor_classes[j]
                proba[i, class_idx] += normalized_weights[j]
                
        return proba

In [36]:
akurasi = []

# Loop untuk nilai k ganjil dari 3 sampai 381
for k in range(3, 187, 2):
    fknn = FKNN(k=k, m=2)
    fknn.fit(X_train_scaled, y_train)
    y_pred = fknn.predict(X_test_scaled)
    acc = accuracy_score(y_test, y_pred)
    akurasi.append((k, acc))

# Cetak nilai K terbaik
best_k, best_acc = max(akurasi, key=lambda x: x[1])
print(f"Nilai K terbaik: {best_k} dengan akurasi: {best_acc:.4f}")

Nilai K terbaik: 3 dengan akurasi: 0.8800


In [37]:
fknn = FKNN(k=3, m=2)
fknn.fit(X_train_scaled, y_train)

y_pred = fknn.predict(X_test_scaled)

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")

Accuracy: 0.88


In [52]:
proba = fknn.predict_proba(X_test)
print("\nPredicted probabilities for first 5 samples:")
print(proba)


Predicted probabilities for first 5 samples:
[[0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]]


In [42]:
def euclidean_distance(a, b):
    return np.sqrt(np.sum((a - b) ** 2))

def fknn_predict_multiclass(X_train, y_train, X_test, k=3, m=2):
    predictions = []
    membership_degrees = []

    classes = np.unique(y_train)  # Dapatkan semua kelas unik

    for x in X_test:
        distances = []
        for i in range(len(X_train)):
            dist = euclidean_distance(x, X_train[i])
            distances.append((dist, y_train[i]))

        # Urutkan berdasarkan jarak
        distances.sort(key=lambda x: x[0])
        k_nearest = distances[:k]

        # Hitung derajat keanggotaan fuzzy untuk semua kelas
        u = {cls: 0.0 for cls in classes}
        for d, cls in k_nearest:
            weight = 1.0 / (d ** (2 / (m - 1)) + 1e-8)
            u[cls] += weight

        total = sum(u.values())
        degree = {f'class_{cls}': u[cls] / total for cls in classes}
        membership_degrees.append(degree)

        # Prediksi kelas dengan derajat keanggotaan tertinggi
        predicted_class = max(u, key=u.get)
        predictions.append(predicted_class)

    return predictions, membership_degrees


In [43]:
y_pred, memberships = fknn_predict_multiclass(X_train_scaled, y_train.values, X_test_scaled, k=3, m=2)

In [44]:
print("Prediksi:", y_pred)
print("Asli    :", y_test.values.tolist())
print("Akurasi:", accuracy_score(y_test, y_pred))

Prediksi: [1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 2, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 2, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 0, 2, 0, 0, 1, 1, 1, 1, 2, 1, 2, 2, 2, 1, 0, 2, 0, 2, 2, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 2, 2]
Asli    : [1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 2, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 0, 1, 2, 2, 2, 2, 0, 2, 2, 2, 2, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 2, 2]
Akurasi: 0.88


In [51]:
for i in range(len(y_pred)):
    print(f"Data uji ke-{i+1}: Prediksi={y_pred[i]}, Asli={y_test.values[i]}")
    print(f"  Derajat keanggotaan -> Class 0: {memberships[i]['class_0']:.4f}, Class 1: {memberships[i]['class_1']:.4f}, Class 2: {memberships[i]['class_2']:.4f}\n")

Data uji ke-1: Prediksi=1, Asli=1
  Derajat keanggotaan -> Class 0: 0.0000, Class 1: 1.0000, Class 2: 0.0000

Data uji ke-2: Prediksi=1, Asli=1
  Derajat keanggotaan -> Class 0: 0.0000, Class 1: 1.0000, Class 2: 0.0000

Data uji ke-3: Prediksi=1, Asli=1
  Derajat keanggotaan -> Class 0: 0.0000, Class 1: 0.6667, Class 2: 0.3333

Data uji ke-4: Prediksi=1, Asli=1
  Derajat keanggotaan -> Class 0: 0.0000, Class 1: 1.0000, Class 2: 0.0000

Data uji ke-5: Prediksi=1, Asli=1
  Derajat keanggotaan -> Class 0: 0.0000, Class 1: 1.0000, Class 2: 0.0000

Data uji ke-6: Prediksi=0, Asli=0
  Derajat keanggotaan -> Class 0: 1.0000, Class 1: 0.0000, Class 2: 0.0000

Data uji ke-7: Prediksi=1, Asli=1
  Derajat keanggotaan -> Class 0: 0.3333, Class 1: 0.6667, Class 2: 0.0000

Data uji ke-8: Prediksi=1, Asli=1
  Derajat keanggotaan -> Class 0: 0.0000, Class 1: 1.0000, Class 2: 0.0000

Data uji ke-9: Prediksi=1, Asli=1
  Derajat keanggotaan -> Class 0: 0.0000, Class 1: 1.0000, Class 2: 0.0000

Data uji k