# Maternal Health Prediction Model


In [1]:
# Import Libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler  # Importing StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix 
#laoding dataset
data= pd.read_csv("Maternal_health.csv")


# checking if datset is loaded accurately or not

In [2]:
data.head(5)

Unnamed: 0,Age,SystolicBP,DiastolicBP,BS,BodyTemp,HeartRate,RiskLevel
0,25,130,80,15.0,98.0,86,high risk
1,35,140,90,13.0,98.0,70,high risk
2,29,90,70,8.0,100.0,80,high risk
3,30,140,85,7.0,98.0,70,high risk
4,35,120,60,6.1,98.0,76,low risk


In [3]:
data.tail(5)

Unnamed: 0,Age,SystolicBP,DiastolicBP,BS,BodyTemp,HeartRate,RiskLevel
1009,22,120,60,15.0,98.0,80,high risk
1010,55,120,90,18.0,98.0,60,high risk
1011,35,85,60,19.0,98.0,86,high risk
1012,43,120,90,18.0,98.0,70,high risk
1013,32,120,65,6.0,101.0,76,mid risk


# checking if any null data in each column

In [4]:
data.isnull().sum()

Age            0
SystolicBP     0
DiastolicBP    0
BS             0
BodyTemp       0
HeartRate      0
RiskLevel      0
dtype: int64

# checking data_type of each column

In [5]:
data.dtypes

Age              int64
SystolicBP       int64
DiastolicBP      int64
BS             float64
BodyTemp       float64
HeartRate        int64
RiskLevel       object
dtype: object

In [6]:
data['RiskLevel']


0       high risk
1       high risk
2       high risk
3       high risk
4        low risk
          ...    
1009    high risk
1010    high risk
1011    high risk
1012    high risk
1013     mid risk
Name: RiskLevel, Length: 1014, dtype: object

# using describe() to see more detail of the dataset

In [7]:
data.describe()

Unnamed: 0,Age,SystolicBP,DiastolicBP,BS,BodyTemp,HeartRate
count,1014.0,1014.0,1014.0,1014.0,1014.0,1014.0
mean,29.871795,113.198225,76.460552,8.725986,98.665089,74.301775
std,13.474386,18.403913,13.885796,3.293532,1.371384,8.088702
min,10.0,70.0,49.0,6.0,98.0,7.0
25%,19.0,100.0,65.0,6.9,98.0,70.0
50%,26.0,120.0,80.0,7.5,98.0,76.0
75%,39.0,120.0,90.0,8.0,98.0,80.0
max,70.0,160.0,100.0,19.0,103.0,90.0


In [8]:
#names of all the columns
data.columns

Index(['Age', 'SystolicBP', 'DiastolicBP', 'BS', 'BodyTemp', 'HeartRate',
       'RiskLevel'],
      dtype='object')

"Data Preprocessing"
From onwards i am pre-processing my data

# converting the target column into 3 numeric category i.e low risk=0, mid risk=1 and high risk=2



In [9]:
data['RiskLevel']=data['RiskLevel'].map({'low risk':0, 'mid risk':1, 'high risk':2})

data['RiskLevel']

0       2
1       2
2       2
3       2
4       0
       ..
1009    2
1010    2
1011    2
1012    2
1013    1
Name: RiskLevel, Length: 1014, dtype: int64

# converting data in to feature and target

In [10]:
x=data.drop('RiskLevel', axis=1)
y=data['RiskLevel']

In [11]:
#printing the names of Faetures.
print("Name of the columns in the X dataset\n",x.columns)

Name of the columns in the X dataset
 Index(['Age', 'SystolicBP', 'DiastolicBP', 'BS', 'BodyTemp', 'HeartRate'], dtype='object')


In [12]:
#printing the names of Target column.
print("The Feature column\n",y.name)

The Feature column
 RiskLevel


 # splitting dataset into training data and testing data

In [13]:
x_train,x_test,y_train,y_test=train_test_split(x, y, test_size=0.2, random_state=45)

In [14]:
print(f"Testing set shape: {x_test.shape}")

Testing set shape: (203, 6)


In [15]:
print(f"\nTraining set shape: {x_train.shape}")


Training set shape: (811, 6)


# Standardizing the features 

In [16]:
from sklearn.preprocessing import StandardScaler  # Importing StandardScaler
# Standardizing the features
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)


# K-Nearest Neighbors Model Training

Before using the loop, the accuracy of my model was **65.95%**, which is relatively low. To improve model performance, I implemented a loop to test different values for the number of neighbors in the KNN algorithm, ranging from **1 to 10**.

In [17]:
# Trying different values for n_neighbors
for n in range(1, 11):  # Testing from 1 to 10 neighbors
    knn = KNeighborsClassifier(n_neighbors=n)  # Create an instance of the KNN classifier with n neighbors
    knn.fit(x_train, y_train)  # Train the model using the training data
    y_pred = knn.predict(x_test)  # Use the trained model to predict labels for the test set
    accuracy = accuracy_score(y_test, y_pred)  # Calculate the accuracy of the model's predictions
    print(f"Accuracy with {n} neighbors: {accuracy * 100:.2f}%")  # Print the accuracy


Accuracy with 1 neighbors: 82.76%
Accuracy with 2 neighbors: 77.83%
Accuracy with 3 neighbors: 72.41%
Accuracy with 4 neighbors: 68.47%
Accuracy with 5 neighbors: 65.02%
Accuracy with 6 neighbors: 68.97%
Accuracy with 7 neighbors: 68.47%
Accuracy with 8 neighbors: 67.98%
Accuracy with 9 neighbors: 66.50%
Accuracy with 10 neighbors: 68.97%


# Confusion Matrix

In [18]:
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", conf_matrix)

Confusion Matrix:
 [[63 15  0]
 [34 36  5]
 [ 4  5 41]]


# Classification Report

In [19]:
class_report = classification_report(y_test, y_pred)
print("Classification Report:\n", class_report)

Classification Report:
               precision    recall  f1-score   support

           0       0.62      0.81      0.70        78
           1       0.64      0.48      0.55        75
           2       0.89      0.82      0.85        50

    accuracy                           0.69       203
   macro avg       0.72      0.70      0.70       203
weighted avg       0.70      0.69      0.68       203

