https://medium.com/@cheng3374/ปรับ-parameter-ให้-model-มีประสิทธิภาพด้วย-gridsearch-32174da8da95

In [None]:
import pandas as pd
import numpy as np

In [None]:
dataSet = pd.read_csv("Alzheimer.csv")
dataSet

# MARK: Data Processing

In [None]:
dataSet.info()

In [None]:
dataSet.isna().sum()

### MARK: เลือกที่จะ drop ทิ้งเพราะว่า Null มีจำนวนไม่เยอะมาก ไม่มีผลต่อ data เท่าที่ควร และ ไม่สามารถหาค่า BMI มาแทนที่ได้เพราะไม่มี weight และ height ของผู้ป่วย

In [None]:
dataSet = dataSet.dropna()
dataSet.isna().sum()

### MARK: ตรวจสอบ Class Target จำนวนเหมาะสมกันหรือไม่

In [None]:
print("Target == 1 ->", len(dataSet.loc[dataSet["alzheimer"] == 1, "alzheimer"]))

In [None]:
print("Target == 1 ->", len(dataSet.loc[dataSet["alzheimer"] == 0, "alzheimer"]))

### MARK: ปรับข้อมูล เป็น 0,1 <- LabelEncoder

In [None]:
from sklearn.preprocessing import LabelEncoder

In [None]:
dataSet.head()

In [None]:
label_encoder = LabelEncoder()

In [None]:
dataSet['Education'] = label_encoder.fit_transform(dataSet['Education'])
dataSet['family history'] = label_encoder.fit_transform(dataSet['family history'])
dataSet['Occupation'] = label_encoder.fit_transform(dataSet['Occupation'])
dataSet['Triglycerides'] = label_encoder.fit_transform(dataSet['Triglycerides'])
dataSet['smoking_status'] = label_encoder.fit_transform(dataSet['smoking_status'])

In [None]:
dataSet

# MARK: แบ่งข้อมูล

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

### MARK: train_test_split

In [None]:
X = dataSet.drop('alzheimer', axis=1)
y = dataSet["alzheimer"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
X_train.head()

In [None]:
X_train.info()

In [None]:
y_train.info()

### SMOTE (Synthetic Minority Over-sampling Technique): วิธีการที่ใช้การสร้างตัวอย่างสัญญาณเทียบเท่าจากคลาสน้อยโดยการผสมคุณลักษณะของตัวอย่างในคลาสน้อย

In [None]:
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import StandardScaler

In [None]:
smote = SMOTE(random_state=42)
X_train_smote, y_train_smote = smote.fit_resample(X_train, y_train)

## MARK: ทำ scaler

In [None]:
scaler = StandardScaler()

In [None]:
X_train_scaled = scaler.fit_transform(X_train_smote)
X_test_scaled = scaler.transform(X_test)

# Model Neural Network

In [None]:
from sklearn.neural_network import MLPClassifier

In [None]:
nnModel = MLPClassifier(hidden_layer_sizes=(100, 50), max_iter=1000, random_state=42)

In [None]:
nnModel.fit(X_train_scaled, y_train_smote)

In [None]:
print("ตัวอย่างข้อมูลเรียนรู้ ->",nnModel.score(X_train_scaled,y_train_smote))

In [None]:
y_pred = nnModel.predict(X_test_scaled)
print("ตัวอย่างข้อมูลชุดทดสอบ ->",accuracy_score(y_test, y_pred))

In [None]:
pd.crosstab(y_test, y_pred)

In [None]:
print("Classification Report:\n", classification_report(y_test, y_pred))

## MARK: หา Best Parameter

In [None]:
from sklearn.model_selection import GridSearchCV

In [None]:
param_grid = {
    'hidden_layer_sizes': [(100,), (50, 50), (100, 50), (50, 100)],
    'activation': ['logistic', 'relu'],
    'alpha': [0.0001, 0.001, 0.01, 0.1],
    'solver': ['sgd', 'adam'],
     'max_iter': [500, 700, 1000],
    'learning_rate': ['constant', 'invscaling', 'adaptive'],
}
   

In [None]:
mlp = MLPClassifier(random_state=42)

In [None]:
grid_search = GridSearchCV(mlp, param_grid, cv=10, scoring='accuracy')
grid_result = grid_search.fit(X_train_scaled, y_train_smote)
grid_result

In [None]:
best_params = grid_search.best_params_
print("Best Parameters:", best_params)

In [None]:
best_accuracy = grid_search.best_score_
print("Best Accuracy:", best_accuracy)

In [None]:
nn_model = MLPClassifier(**best_params, random_state=42) 
# nn_model = MLPClassifier(**best_params, max_iter=1000, random_state=42)

In [None]:
nn_model.fit(X_train_scaled, y_train_smote)

In [None]:
print("ตัวอย่างข้อมูลเรียนรู้ ->",nn_model.score(X_train_scaled,y_train_smote))

In [None]:
y_pred = nn_model.predict(X_test_scaled)
print("ตัวอย่างข้อมูลชุดทดสอบ ->",accuracy_score(y_test, y_pred))

In [None]:
pd.crosstab(y_test, y_pred)

In [None]:
print("Classification Report:\n", classification_report(y_test, y_pred))