In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.feature_selection import SelectKBest, chi2
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, confusion_matrix

In [2]:
df = pd.read_csv("diabetes.csv")

In [3]:
test_instance_index = np.random.randint(0, len(df))  # Rastgele bir indeks seçiyoruz
test_instance = df.iloc[test_instance_index]  # Seçilen rastgele örneği alıyoruz
df = df.drop(test_instance_index)

In [4]:
X = df.drop(['Outcome'], axis=1)
y = df["Outcome"] 

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

In [6]:
gnb = GaussianNB()
gnb.fit(X_train, y_train)
y_pred = gnb.predict(X_test)
accuracy_original = accuracy_score(y_test, y_pred)
conf_matrix_original = confusion_matrix(y_test, y_pred)

In [7]:
scaler_minmax = MinMaxScaler()
X_train_minmax = scaler_minmax.fit_transform(X_train)
X_test_minmax = scaler_minmax.transform(X_test)

In [8]:
gnb_minmax = GaussianNB()
gnb_minmax.fit(X_train_minmax, y_train)
y_pred_minmax = gnb_minmax.predict(X_test_minmax)
accuracy_minmax = accuracy_score(y_test, y_pred_minmax)
conf_matrix_minmax = confusion_matrix(y_test, y_pred_minmax)

In [9]:
param_grid = {'var_smoothing': np.logspace(0,-9, num=100)}
grid_search = GridSearchCV(gnb, param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)
best_model = grid_search.best_estimator_
y_pred_gridsearch = best_model.predict(X_test)
accuracy_gridsearch = accuracy_score(y_test, y_pred_gridsearch)
conf_matrix_gridsearch = confusion_matrix(y_test, y_pred_gridsearch)

In [10]:
select_features = SelectKBest(score_func=chi2, k=4) 
X_train_features = select_features.fit_transform(X_train, y_train)
X_test_features = select_features.transform(X_test)

In [15]:
gnb_final = GaussianNB()
gnb_final.fit(X_train_features, y_train)
y_pred_final = gnb_final.predict(X_test_features)
accuracy_final = accuracy_score(y_test, y_pred_final)
conf_matrix_final = confusion_matrix(y_test, y_pred_final)

In [17]:
print(" Orijinal Model Doğruluğu:", accuracy_original)
print("Confusion Matrix (Orijinal Model):\n", conf_matrix_original)
print("\n Min-Max Normalizasyon Sonrası Model Doğruluğu:", accuracy_minmax)
print("Confusion Matrix (Min-Max Normalizasyon Sonrası Model):\n", conf_matrix_minmax)
print("\n GridSearchCV Sonrası Model Doğruluğu:", accuracy_gridsearch)
print("Confusion Matrix (GridSearchCV Sonrası Model):\n", conf_matrix_gridsearch)
print("\n Özellik Seçimi Sonrası Model Doğruluğu:", accuracy_final)
print("Confusion Matrix (Özellik Seçimi Sonrası Model):\n", conf_matrix_final)

 Orijinal Model Doğruluğu: 0.7857142857142857
Confusion Matrix (Orijinal Model):
 [[87 18]
 [15 34]]

 Min-Max Normalizasyon Sonrası Model Doğruluğu: 0.7857142857142857
Confusion Matrix (Min-Max Normalizasyon Sonrası Model):
 [[87 18]
 [15 34]]

 GridSearchCV Sonrası Model Doğruluğu: 0.7792207792207793
Confusion Matrix (GridSearchCV Sonrası Model):
 [[86 19]
 [15 34]]

 Özellik Seçimi Sonrası Model Doğruluğu: 0.7467532467532467
Confusion Matrix (Özellik Seçimi Sonrası Model):
 [[87 18]
 [21 28]]
