In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from imblearn.over_sampling import SMOTE

import tensorflow as tf
from imblearn.over_sampling import RandomOverSampler
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report, pair_confusion_matrix

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
df2 = pd.read_csv("/content/drive/MyDrive/preeclampsia.csv")

In [None]:
df2.head()

In [None]:
df2.columns

In [None]:
df2.info()

In [None]:
df2.isnull().sum()

In [None]:
# Define a basic risk classification based on key preeclampsia risk factors
# We will classify patients as high-risk (1) or low-risk (0) based on simplified criteria:
# - High diastolic blood pressure (diabp > 90)
# - Abnormal PlGF:sFlt ratio (using a threshold, here assuming < 100 as abnormal)
# - History of hypertension (htn == 1)
# - BMI > 30

def classify_risk(row):
    risk_score = 0
    if row['diabp'] > 90:
        risk_score += 2
    if row['plgf:sflt'] < 80:
        risk_score += 3
    if row['htn'] == 1:
        risk_score += 2
    if row['bmi'] > 30:
        risk_score += 1

    if risk_score >= 5:
        return 1  # High risk
    else:
        return 0  # Low risk

# Apply the classification to the dataset
df2['preeclampsia_risk'] = df2.apply(classify_risk, axis=1)

# Preview the dataset with the new column
df2[['bmi', 'diabp', 'plgf:sflt', 'htn', 'preeclampsia_risk']].head()


In [None]:
df2.head()

In [None]:
df2['preeclampsia_risk'].value_counts()

In [None]:
for label in df2.columns[:-1]:
  plt.hist(df2[df2["preeclampsia_risk"] == 1][label], color = "blue", label = "preeclampsia", alpha = 0.7, density = True)
  plt.hist(df2[df2["preeclampsia_risk"] == 0][label], color = "red", label = "no preeclampsia", alpha = 0.7, density = True)
  plt.title(label)
  plt.ylabel("Probability")
  plt.xlabel(label)
  plt.legend()
  plt.show()

In [None]:
X = df2.drop('preeclampsia_risk', axis=1)
y = df2['preeclampsia_risk']

In [None]:
X.columns

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
y_train.value_counts()

In [None]:
smote = SMOTE(sampling_strategy='auto')

X_train, y_train = smote.fit_resample(X_train, y_train)


In [None]:
y_train.value_counts()

In [None]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
X_train

In [35]:
# model instance
bst = XGBClassifier(n_estimators=2, max_depth=2, learning_rate=1, objective='binary:logistic', enable_categorical = True)
# fit model
bst.fit(X_train, y_train)
# make predictions
preds = bst.predict(X_test)
print(preds)

[0 1 0 0 1 0 1 0 0 0 0 0 0 0 0 1 1 1 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 0 1 0
 1 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 1 1 0 0 0 0 1 0 0 0 0 0 1 1 0 0 0 0 0 0 0
 1 1 1 0 1 0]


In [36]:
# classification report
from sklearn.metrics import classification_report


print(classification_report(y_test, preds))

              precision    recall  f1-score   support

           0       0.98      0.96      0.97        57
           1       0.92      0.96      0.94        23

    accuracy                           0.96        80
   macro avg       0.95      0.96      0.95        80
weighted avg       0.96      0.96      0.96        80



In [None]:
from sklearn.metrics import accuracy_score

# Computing the accuracy on the test data
accuracy = accuracy_score(y_test, preds)
print(f"Accuracy: {accuracy:.2f}")


In [None]:
from sklearn import metrics
print(" the accuracy of the XGBOOST is: ", metrics.accuracy_score(preds,y_test))

In [None]:
r_clf=RandomForestClassifier()
r_clf.fit(X_train,y_train)

In [None]:
y_pred=r_clf.predict(X_test)

In [None]:
from sklearn import metrics
print(" the accuracy of the random tree clasifier is: ", metrics.accuracy_score(y_pred,y_test))

In [None]:
from sklearn.ensemble import VotingClassifier

# Create a voting classifier with the base models
voting_clf = VotingClassifier(estimators=[('rf', bst), ('gb', r_clf)], voting='hard')

# Train the ensemble model
voting_clf.fit(X_train, y_train)

# Make predictions
y_preds = voting_clf.predict(X_test)

print(" the accuracy of the random tree clasifier is: ", metrics.accuracy_score(y_preds,y_test))

In [None]:
import joblib
joblib.dump(voting_clf, "safe_mom_model_1.pkl")