In [2]:
# Importing Libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
import xgboost as xgb
from imblearn.over_sampling import SMOTE

# Loading the Dataset
df = pd.read_csv("/content/ctu_df.csv")
df.head()

Unnamed: 0,ID,pH,BDecf,pCO2,BE,Apgar1,Apgar5,Gest. Weeks,Weight(g),Sex,...,Median_UC,Std_FHR,Std_UC,RMS_FHR,RMS_UC,Peak_to_RMS_FHR,Peak_to_RMS_UC,Peak_FHR,Peak_UC,target
0,1001,7.14,8.14,7.7,-10.5,6,8,37,2660.0,2,...,40.0,56.509679,32.048961,116.828582,53.161922,101.671418,46.838078,218.5,100.0,1
1,1002,7.0,7.92,12.0,-12.0,8,8,41,2900.0,2,...,28.0,41.769274,11.476301,100.694363,27.660343,52.305637,66.339657,153.0,94.0,1
2,1003,7.2,3.03,8.3,-5.6,7,9,40,3770.0,1,...,25.0,57.026851,22.048012,150.356237,34.921642,32.643763,77.078358,183.0,112.0,1
3,1004,7.3,5.19,5.5,-6.4,8,9,41,3370.0,1,...,14.0,51.581668,16.210997,105.951334,22.31554,123.048666,100.68446,229.0,123.0,1
4,1005,7.3,4.52,5.7,-5.8,9,10,41,3550.0,2,...,0.0,68.816249,19.032601,95.44129,24.423261,89.80871,92.076739,185.25,116.5,1


In [3]:
print("Columns:")
df.columns

Columns:


Index(['ID', 'pH', 'BDecf', 'pCO2', 'BE', 'Apgar1', 'Apgar5', 'Gest. Weeks',
       'Weight(g)', 'Sex', 'Age', 'Gravidity', 'Parity', 'Diabetes',
       'Hypertension', 'Preeclampsia', 'Liq.', 'Pyrexia', 'Meconium',
       'Presentation', 'Induced', 'I.stage', 'NoProgress', 'CK/KP', 'II.stage',
       'Deliv. type', 'dbID', 'Rec. type', 'Pos. II.st.', 'Sig2Birth',
       'Mean_FHR', 'Mean_UC', 'Median_FHR', 'Median_UC', 'Std_FHR', 'Std_UC',
       'RMS_FHR', 'RMS_UC', 'Peak_to_RMS_FHR', 'Peak_to_RMS_UC', 'Peak_FHR',
       'Peak_UC', 'target'],
      dtype='object')

In [4]:
print("Shape:")
df.shape

Shape:


(552, 43)

In [5]:
df.fillna(df.median(), inplace=True)  # Handle missing values

In [6]:
drop_cols = ['ID', 'dbID', 'Rec. type', 'Pos. II.st.', 'Sig2Birth', 'target']
X = df.drop(columns=drop_cols)
y = df['target'] # Targetted 2 classes (Normal = 1,Pathologic = 0)

In [7]:
X.shape

(552, 37)

In [8]:
# Apply SMOTE to balance the classes
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

In [9]:
# Splitting the Training(80%) and Test data(20%)
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

In [10]:
# Feature scaling (XGBoost handles unscaled data well, but standardization can help)
from sklearn.preprocessing import StandardScaler, MinMaxScaler

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train) # Using StandardScaler to scale to mean = 0 & Strandard Deviation = 1.
X_test = scaler.transform(X_test)

In [11]:
# Initialize and train XGBoost classifier
model = xgb.XGBClassifier(use_label_encoder=False, eval_metric="logloss")
model.fit(X_train, y_train)

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


In [12]:
# Generating predictions
y_pred = model.predict(X_test)

In [13]:
# Evaluating the model (Calculating the model Loss and Accuracy)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")

Accuracy: 0.9212


In [14]:
# Classification Report & Evaluating Predictions.
from sklearn.metrics import classification_report

print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=["Pathologic", "Normal"]))

Classification Report:
              precision    recall  f1-score   support

  Pathologic       0.92      0.92      0.92       104
      Normal       0.92      0.92      0.92        99

    accuracy                           0.92       203
   macro avg       0.92      0.92      0.92       203
weighted avg       0.92      0.92      0.92       203

