In [19]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import warnings
from sklearn.model_selection import train_test_split

In [2]:
data = 'Obesity Classification.csv'

df = pd.read_csv(data)

In [3]:
df.head()

Unnamed: 0,ID,Age,Gender,Height,Weight,BMI,Label
0,1,25,Male,175,80,25.3,Normal Weight
1,2,30,Female,160,60,22.5,Normal Weight
2,3,35,Male,180,90,27.3,Overweight
3,4,40,Female,150,50,20.0,Underweight
4,5,45,Male,190,100,31.2,Obese


In [4]:
col_names = df.columns

col_names

Index(['ID', 'Age', 'Gender', 'Height', 'Weight', 'BMI', 'Label'], dtype='object')

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 108 entries, 0 to 107
Data columns (total 7 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   ID      108 non-null    int64  
 1   Age     108 non-null    int64  
 2   Gender  108 non-null    object 
 3   Height  108 non-null    int64  
 4   Weight  108 non-null    int64  
 5   BMI     108 non-null    float64
 6   Label   108 non-null    object 
dtypes: float64(1), int64(4), object(2)
memory usage: 6.0+ KB


In [6]:
df['Label'].value_counts()

Underweight      47
Normal Weight    29
Overweight       20
Obese            12
Name: Label, dtype: int64

In [8]:
print(df['Label'].isna().sum())

print(df['Label'].unique())

0
['Normal Weight' 'Overweight' 'Underweight' 'Obese']


In [9]:
gender_map = {'Male': 0, 'Female': 1}

df['Gender'] = df['Gender'].map(gender_map)

df['BMI'] = round(df['BMI']).astype(int)

label_map = {'Normal Weight': 0, 'Overweight': 1, 'Underweight' : 2, 'Obese' : 3}

df['Label'] = df['Label'].map(label_map).astype(int)

In [10]:
df.info()
df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 108 entries, 0 to 107
Data columns (total 7 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   ID      108 non-null    int64
 1   Age     108 non-null    int64
 2   Gender  108 non-null    int64
 3   Height  108 non-null    int64
 4   Weight  108 non-null    int64
 5   BMI     108 non-null    int32
 6   Label   108 non-null    int32
dtypes: int32(2), int64(5)
memory usage: 5.2 KB


Unnamed: 0,ID,Age,Gender,Height,Weight,BMI,Label
0,1,25,0,175,80,25,0
1,2,30,1,160,60,22,0
2,3,35,0,180,90,27,1
3,4,40,1,150,50,20,2
4,5,45,0,190,100,31,3


In [11]:
X = df.drop(['Label'], axis=1)

y = df['Label']

In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

X_train.shape, X_test.shape

((86, 6), (22, 6))

In [23]:
from sklearn.svm import SVC


# import metrics to compute accuracy
from sklearn.metrics import accuracy_score


# instantiate classifier with default hyperparameters
svc=SVC() 


# fit classifier to training set
svc.fit(X_train,y_train)


# make predictions on test set
y_pred=svc.predict(X_test)


# compute and print accuracy score
print('Model accuracy score with default hyperparameters: {0:0.4f}'. format(accuracy_score(y_test, y_pred)))

Model accuracy score with default hyperparameters: 0.6818


In [21]:
from sklearn.metrics import confusion_matrix, classification_report


svc=SVC(C=100.0) 

# fit classifier to training set
svc.fit(X_train,y_train)


# make predictions on test set
y_pred=svc.predict(X_test)


# compute and print accuracy score
print('Model accuracy score with rbf kernel and C=100.0 : {0:0.4f}'. format(accuracy_score(y_test, y_pred)))

Model accuracy score with rbf kernel and C=100.0 : 0.9091


In [24]:

# Menghitung dan mencetak matriks kebingungan
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

# Menampilkan laporan klasifikasi
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

Confusion Matrix:
[[3 0 1 0]
 [5 0 0 1]
 [0 0 9 0]
 [0 0 0 3]]

Classification Report:
              precision    recall  f1-score   support

           0       0.38      0.75      0.50         4
           1       0.00      0.00      0.00         6
           2       0.90      1.00      0.95         9
           3       0.75      1.00      0.86         3

    accuracy                           0.68        22
   macro avg       0.51      0.69      0.58        22
weighted avg       0.54      0.68      0.60        22



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
