In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [7]:
from sklearn import tree
import xgboost as xgb
import lightgbm as lgb
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
import tensorflow as tf
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import classification_report
import warnings 
warnings.filterwarnings("ignore")

from sklearn.feature_selection import SelectKBest, mutual_info_classif, f_classif
from sklearn.pipeline import Pipeline

In [3]:
df = pd.read_csv('/content/drive/My Drive/Research/Fetal Health/fetal_health.csv')
X = df.drop(columns=['fetal_health'])
y = df['fetal_health']

In [None]:
# XGBoost with Information Gain

X = df.drop(columns=['fetal_health'], axis=1)
y = df['fetal_health']-1

selector = SelectKBest(mutual_info_classif, k=17)
X_selected = selector.fit_transform(X, y)

X_train, X_test, y_train, y_test = train_test_split(X_selected, y, test_size=0.2, random_state=42)

model = xgb.XGBClassifier(objective='multi:softmax', num_class=3)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

report = classification_report(y_test, y_pred)
print(report)

              precision    recall  f1-score   support

         0.0       0.96      0.98      0.97       333
         1.0       0.87      0.83      0.85        64
         2.0       1.00      0.97      0.98        29

    accuracy                           0.95       426
   macro avg       0.94      0.92      0.93       426
weighted avg       0.95      0.95      0.95       426



In [14]:
# XGBoost with K-Best

X = df.drop(columns=['fetal_health'], axis=1)
y = df['fetal_health']-1

selector = SelectKBest(f_classif, k=17)
X_selected = selector.fit_transform(X, y)

X_train, X_test, y_train, y_test = train_test_split(X_selected, y, test_size=0.2, random_state=42)

model = xgb.XGBClassifier(objective='multi:softmax', num_class=3)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

report = classification_report(y_test, y_pred)
print(report)

              precision    recall  f1-score   support

         0.0       0.97      0.97      0.97       333
         1.0       0.87      0.83      0.85        64
         2.0       0.94      1.00      0.97        29

    accuracy                           0.95       426
   macro avg       0.92      0.93      0.93       426
weighted avg       0.95      0.95      0.95       426



In [10]:
# XGBoost with K-Best & Information Gain

X = df.drop(columns=['fetal_health'])
y = df['fetal_health']-1

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=45)

pipe = Pipeline([
    ('selectkbest1', SelectKBest(f_classif)),
    ('selectkbest2', SelectKBest(mutual_info_classif)),
    ('classifier', xgb.XGBClassifier(objective='multi:softmax', num_class=3))
])

pipe.fit(X_train, y_train)

y_pred = pipe.predict(X_test)

report = classification_report(y_test, y_pred)
print(report)

              precision    recall  f1-score   support

         0.0       0.97      0.98      0.97       332
         1.0       0.87      0.82      0.84        55
         2.0       0.97      0.97      0.97        39

    accuracy                           0.96       426
   macro avg       0.94      0.92      0.93       426
weighted avg       0.96      0.96      0.96       426



In [None]:
# Random Forest

X = df.drop(columns=['fetal_health'])
y = df['fetal_health']

selector = SelectKBest(mutual_info_classif, k=17)
X_selected = selector.fit_transform(X, y)

X_train, X_test, y_train, y_test = train_test_split(X_selected, y, test_size=0.2, random_state=45)

rf = RandomForestClassifier(n_estimators=100, max_depth=5, random_state=42)
rf.fit(X_train, y_train)

y_pred = rf.predict(X_test)

report = classification_report(y_test, y_pred)
print(report)

              precision    recall  f1-score   support

         1.0       0.92      0.99      0.96       332
         2.0       0.81      0.62      0.70        55
         3.0       1.00      0.67      0.80        39

    accuracy                           0.92       426
   macro avg       0.91      0.76      0.82       426
weighted avg       0.91      0.92      0.91       426



In [5]:
# Random Forest with Information Gain

X = df.drop(columns=['fetal_health'])
y = df['fetal_health']

selector = SelectKBest(mutual_info_classif, k=17)
X_selected = selector.fit_transform(X, y)

X_train, X_test, y_train, y_test = train_test_split(X_selected, y, test_size=0.2, random_state=45)

rf = RandomForestClassifier(n_estimators=100, max_depth=5, random_state=42)
rf.fit(X_train, y_train)

y_pred = rf.predict(X_test)

report = classification_report(y_test, y_pred)
print(report)

              precision    recall  f1-score   support

         1.0       0.92      0.99      0.96       332
         2.0       0.81      0.62      0.70        55
         3.0       1.00      0.67      0.80        39

    accuracy                           0.92       426
   macro avg       0.91      0.76      0.82       426
weighted avg       0.91      0.92      0.91       426



In [6]:
# Random Forest with K-Best

X = df.drop(columns=['fetal_health'])
y = df['fetal_health']

selector = SelectKBest(f_classif, k=17)
X_selected = selector.fit_transform(X, y)

X_train, X_test, y_train, y_test = train_test_split(X_selected, y, test_size=0.2, random_state=45)

rf = RandomForestClassifier(n_estimators=100, max_depth=5, random_state=42)
rf.fit(X_train, y_train)

y_pred = rf.predict(X_test)

report = classification_report(y_test, y_pred)
print(report)

              precision    recall  f1-score   support

         1.0       0.92      0.98      0.95       332
         2.0       0.76      0.62      0.68        55
         3.0       1.00      0.67      0.80        39

    accuracy                           0.91       426
   macro avg       0.89      0.76      0.81       426
weighted avg       0.91      0.91      0.90       426



In [8]:
# Random Forest with K-Best & Information Gain

X = df.drop(columns=['fetal_health'])
y = df['fetal_health']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=45)

pipe = Pipeline([
    ('selectkbest1', SelectKBest(f_classif)),
    ('selectkbest2', SelectKBest(mutual_info_classif)),
    ('classifier', RandomForestClassifier(n_estimators=100, max_depth=5, random_state=42))
])

pipe.fit(X_train, y_train)

y_pred = pipe.predict(X_test)

report = classification_report(y_test, y_pred)
print(report)

              precision    recall  f1-score   support

         1.0       0.93      0.98      0.96       332
         2.0       0.78      0.65      0.71        55
         3.0       1.00      0.77      0.87        39

    accuracy                           0.92       426
   macro avg       0.91      0.80      0.85       426
weighted avg       0.92      0.92      0.92       426



In [13]:
# LightGBM with K-Best

df_light = df.drop_duplicates()
X = df_light.drop(columns=['fetal_health'])
y = df_light['fetal_health']-1

X.columns = [col.replace(' ', '_') for col in X.columns]

selector = SelectKBest(f_classif, k=17)
X_selected = selector.fit_transform(X, y)

X_train, X_test, y_train, y_test = train_test_split(X_selected, y, test_size=0.2, random_state=42)

train_data = lgb.Dataset(X_train, label=y_train)
test_data = lgb.Dataset(X_test, label=y_test)

params = {
    'objective': 'multiclass',
    'num_classes': 3,
    'metric': 'multi_error',
    'boosting_type': 'gbdt',
    'num_leaves': 31,
    'learning_rate': 0.05,
    'force_col_wise': 'true'
}

num_rounds = 100
model = lgb.train(params, train_data, num_rounds, valid_sets=[train_data, test_data], early_stopping_rounds=10, verbose_eval=False)

y_pred = model.predict(X_test)

y_pred_labels = y_pred.argmax(axis=1)

report = classification_report(y_test, y_pred_labels)
print()
print(report)

[LightGBM] [Info] Total Bins 1060
[LightGBM] [Info] Number of data points in the train set: 1690, number of used features: 16
[LightGBM] [Info] Start training from score -0.258525
[LightGBM] [Info] Start training from score -1.960213
[LightGBM] [Info] Start training from score -2.442051

              precision    recall  f1-score   support

         0.0       0.97      0.99      0.98       341
         1.0       0.94      0.85      0.89        54
         2.0       1.00      0.96      0.98        28

    accuracy                           0.97       423
   macro avg       0.97      0.94      0.95       423
weighted avg       0.97      0.97      0.97       423



In [None]:
# LightGBM with Information Gain

df_light = df.drop_duplicates()
X = df_light.drop(columns=['fetal_health'])
y = df_light['fetal_health']-1

X.columns = [col.replace(' ', '_') for col in X.columns]

selector = SelectKBest(mutual_info_classif, k=17)
X_selected = selector.fit_transform(X, y)

X_train, X_test, y_train, y_test = train_test_split(X_selected, y, test_size=0.2, random_state=42)

train_data = lgb.Dataset(X_train, label=y_train)
test_data = lgb.Dataset(X_test, label=y_test)

params = {
    'objective': 'multiclass',
    'num_classes': 3,
    'metric': 'multi_error',
    'boosting_type': 'gbdt',
    'num_leaves': 31,
    'learning_rate': 0.05,
    'force_col_wise': 'true'
}

num_rounds = 100
model = lgb.train(params, train_data, num_rounds, valid_sets=[train_data, test_data], early_stopping_rounds=10, verbose_eval=False)

y_pred = model.predict(X_test)

y_pred_labels = y_pred.argmax(axis=1)

report = classification_report(y_test, y_pred_labels)
print()
print(report)

[LightGBM] [Info] Total Bins 1188
[LightGBM] [Info] Number of data points in the train set: 1690, number of used features: 17
[LightGBM] [Info] Start training from score -0.258525
[LightGBM] [Info] Start training from score -1.960213
[LightGBM] [Info] Start training from score -2.442051

              precision    recall  f1-score   support

         0.0       0.97      0.99      0.98       341
         1.0       0.94      0.83      0.88        54
         2.0       1.00      0.96      0.98        28

    accuracy                           0.97       423
   macro avg       0.97      0.93      0.95       423
weighted avg       0.97      0.97      0.97       423



In [12]:
# LightGBM with K-Best & Information Gain

df = pd.read_csv('/content/drive/My Drive/Research/Fetal Health/fetal_health.csv')
df_light = df.drop_duplicates()
X = df_light.drop(columns=['fetal_health'])
y = df_light['fetal_health']-1

X.columns = [col.replace(' ', '_') for col in X.columns]

kbest = SelectKBest(score_func=f_classif, k=10)
X_kbest = kbest.fit_transform(X, y)

infogain = SelectKBest(score_func=mutual_info_classif, k=10)
X_infogain = infogain.fit_transform(X, y)

X_selected = np.concatenate([X_kbest, X_infogain], axis=1)

X_train, X_test, y_train, y_test = train_test_split(X_selected, y, test_size=0.2, random_state=42)

train_data = lgb.Dataset(X_train, label=y_train)
test_data = lgb.Dataset(X_test, label=y_test)

params = {
    'objective': 'multiclass',
    'num_classes': 3,
    'metric': 'multi_error',
    'boosting_type': 'gbdt',
    'num_leaves': 31,
    'learning_rate': 0.05,
    'force_col_wise': 'true'
}

num_rounds = 100
model = lgb.train(params, train_data, num_rounds, valid_sets=[train_data, test_data], early_stopping_rounds=10, verbose_eval=False)

y_pred = model.predict(X_test)

y_pred_labels = y_pred.argmax(axis=1)

report = classification_report(y_test, y_pred_labels)
print()
print(report)

[LightGBM] [Info] Total Bins 1376
[LightGBM] [Info] Number of data points in the train set: 1690, number of used features: 20
[LightGBM] [Info] Start training from score -0.258525
[LightGBM] [Info] Start training from score -1.960213
[LightGBM] [Info] Start training from score -2.442051

              precision    recall  f1-score   support

         0.0       0.97      0.99      0.98       341
         1.0       0.92      0.83      0.87        54
         2.0       0.96      0.96      0.96        28

    accuracy                           0.96       423
   macro avg       0.95      0.93      0.94       423
weighted avg       0.96      0.96      0.96       423

