In [None]:
import pandas as pd

data = pd.read_csv('diabetes.csv')

In [None]:
print(data.head())

# ตรวจสอบข้อมูลที่หายไป
missing_data = data.isnull().sum()
print(missing_data)

   Pregnancies  Glucose  BloodPressure  SkinThickness  Insulin   BMI  \
0            6      148           72.0             35        0  33.6   
1            1       85           66.0             29        0  26.6   
2            8      183           64.0              0        0  23.3   
3            1       89           66.0             23       94  28.1   
4            0      137           40.0             35      168  43.1   

   DiabetesPedigreeFunction  Age  Outcome  
0                     0.627   50        1  
1                     0.351   31        0  
2                     0.672   32        1  
3                     0.167   21        0  
4                     2.288   33        1  
Pregnancies                 0
Glucose                     0
BloodPressure               1
SkinThickness               0
Insulin                     0
BMI                         0
DiabetesPedigreeFunction    0
Age                         0
Outcome                     0
dtype: int64


In [None]:
# แทนที่ค่าที่หายไปในคอลัมน์ BloodPressure ด้วยค่าเฉลี่ย
data['BloodPressure'] = data['BloodPressure'].fillna(data['BloodPressure'].mean())

missing_data = data.isnull().sum()
print(missing_data)

Pregnancies                 0
Glucose                     0
BloodPressure               0
SkinThickness               0
Insulin                     0
BMI                         0
DiabetesPedigreeFunction    0
Age                         0
Outcome                     0
dtype: int64


In [None]:
from sklearn.model_selection import train_test_split

# แบ่งข้อมูลเป็น feature, target
X = data.drop('Outcome', axis=1) #ลบ Outcomeที่เป็นเป้าหมาย
y = data['Outcome'] #คอลัมน์ Outcome เป็นเป้าหมาย

# แบ่งข้อมูลเป็น 80% foe train และ 20% foe test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f"Training set size: {X_train.shape[0]}")
print(f"Test set size: {X_test.shape[0]}")

Training set size: 614
Test set size: 154


In [None]:
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# สร้างโมเดล Neural Network
model = MLPClassifier(
    hidden_layer_sizes=(64, 32, 16),
    activation='relu',
    solver='adam',
    max_iter=3000,
    learning_rate_init=0.01,
    alpha=0.00001  # ลดค่า alpha
)


# train
model.fit(X_train, y_train)

In [None]:
from sklearn.model_selection import cross_val_score

# ใช้ cross-validation ในการประเมินโมเดล
cv_scores = cross_val_score(model, X, y, cv=5)  # ใช้ 5-fold cross-validation

print(f"Cross-validation scores: {cv_scores}")
print(f"Mean cross-validation score: {cv_scores.mean()}")


Cross-validation scores: [0.71428571 0.68831169 0.68831169 0.65359477 0.68627451]
Mean cross-validation score: 0.6861556743909685


In [None]:
from sklearn.metrics import roc_curve, auc

# คำนวณคะแนน ROC
y_prob = model.predict_proba(X_test)[:, 1]  # ใช้ค่าความน่าจะเป็นของ class 1
fpr, tpr, thresholds = roc_curve(y_test, y_prob)
roc_auc = auc(fpr, tpr)

print(f"AUC: {roc_auc}")

AUC: 0.7695133149678604


In [None]:
# predict ผลลัพธ์จากข้อมูลทดสอบ
y_pred = model.predict(X_test)

# ประเมินผลโมเดล
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")

# แสดงผล confusion matrix
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

# แสดง classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

Accuracy: 77.92%

Confusion Matrix:
[[86 13]
 [21 34]]

Classification Report:
              precision    recall  f1-score   support

           0       0.80      0.87      0.83        99
           1       0.72      0.62      0.67        55

    accuracy                           0.78       154
   macro avg       0.76      0.74      0.75       154
weighted avg       0.78      0.78      0.77       154



In [None]:
import joblib

joblib.dump(model, 'diabetes_model.pkl')

['diabetes_model.pkl']

In [None]:
from google.colab import files

files.download('diabetes_model.pkl')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>