In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.naive_bayes import GaussianNB, CategoricalNB
from sklearn.metrics import classification_report, accuracy_score
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
import pandas as pd

In [2]:
data = pd.read_csv('dengue_dataset.csv')
data.head()

Unnamed: 0,Temperature,Platelet_Count,White_Blood_Cell_Count,Body_Pain,Rash,Gender,Infected
0,98.24908,138060.514266,6854.650578,0,0,Female,False
1,99.401429,102193.912651,7347.397486,1,0,Female,False
2,98.963988,141755.5851,6011.997182,0,1,Male,False
3,98.697317,168269.47464,6852.807249,1,0,Male,True
4,97.812037,160752.598059,6800.937446,1,0,Female,True


In [3]:
X = data.drop(columns=['Infected'])
y = data['Infected']

In [4]:
numerical_features = ['Temperature', 'Platelet_Count', 'White_Blood_Cell_Count']
binary_features = ['Body_Pain', 'Rash']
categorical_features = ['Gender']

In [5]:
numerical_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])

In [6]:
X[numerical_features] = numerical_pipeline.fit_transform(X[numerical_features])

In [7]:
def encode_gender(value):
    if value == 'Female':
        return 0
    else:
        return 1

for col in categorical_features:
    X[col] = X[col].astype(str).apply(encode_gender)

In [8]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5000 entries, 0 to 4999
Data columns (total 7 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   Temperature             5000 non-null   float64
 1   Platelet_Count          5000 non-null   float64
 2   White_Blood_Cell_Count  5000 non-null   float64
 3   Body_Pain               5000 non-null   int64  
 4   Rash                    5000 non-null   int64  
 5   Gender                  5000 non-null   object 
 6   Infected                5000 non-null   bool   
dtypes: bool(1), float64(3), int64(2), object(1)
memory usage: 239.4+ KB


In [9]:
data.head()

Unnamed: 0,Temperature,Platelet_Count,White_Blood_Cell_Count,Body_Pain,Rash,Gender,Infected
0,98.24908,138060.514266,6854.650578,0,0,Female,False
1,99.401429,102193.912651,7347.397486,1,0,Female,False
2,98.963988,141755.5851,6011.997182,0,1,Male,False
3,98.697317,168269.47464,6852.807249,1,0,Male,True
4,97.812037,160752.598059,6800.937446,1,0,Female,True


In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [11]:
gnb = GaussianNB()
gnb.fit(X_train[numerical_features], y_train)
numerical_preds = gnb.predict(X_test[numerical_features])

In [20]:
acc=accuracy_score(y_test, numerical_preds)
print(f"Naive Akurasi:{acc:.2f}")

Naive Akurasi:0.74


In [13]:
cat_nb = CategoricalNB()
cat_nb.fit(X_train[binary_features + categorical_features], y_train)
categorical_preds = cat_nb.predict(X_test[binary_features + categorical_features])

In [14]:
final_preds = (numerical_preds.astype(int) + categorical_preds.astype(int)) >= 1


In [19]:
print("Hybrid Accuracy:", accuracy_score(y_test, final_preds))

Hybrid Accuracy: 0.808
