In [1]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier

# Load your dataset
data = pd.read_csv(r'/content/sample_data/Maternal Health Risk Data Set.csv')

# Split the data into features (X) and target variable (y)
X = data.drop(columns=['RiskLevel'])
y = data['RiskLevel']

# Create a Random Forest Classifier
clf = RandomForestClassifier(random_state=42)

# Fit the model to the data
clf.fit(X, y)

# Get feature importances
feature_importances = clf.feature_importances_

# Create a DataFrame to display the feature importances
importance_df = pd.DataFrame({'Feature': X.columns, 'Importance': feature_importances})

# Sort the DataFrame by importance in descending order
importance_df = importance_df.sort_values(by='Importance', ascending=False)

# Print the feature importances
print(importance_df)


       Feature  Importance
3           BS    0.365845
1   SystolicBP    0.183022
0          Age    0.160200
2  DiastolicBP    0.123006
5    HeartRate    0.105101
4     BodyTemp    0.062827


In [2]:
import pandas as pd
import seaborn as sns
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.metrics import classification_report

In [3]:
data.isnull().sum()

Age            0
SystolicBP     0
DiastolicBP    0
BS             0
BodyTemp       0
HeartRate      0
RiskLevel      0
dtype: int64

In [4]:
data_dup = data.duplicated().any()
data_dup

True

In [5]:
df = data.drop_duplicates()

In [6]:
data_dup = df.duplicated().any()
data_dup

False

In [7]:
RiskLevel = {'low risk': 1, 'mid risk': 2, 'high risk': 3}

# Create a copy of the DataFrame
df_copy = data.copy()

# Update the 'RiskLevel' column in the copied DataFrame
df_copy['RiskLevel'] = data['RiskLevel'].map(RiskLevel).astype(float)

# Now df_copy has the modified 'RiskLevel' column
df_copy

Unnamed: 0,Age,SystolicBP,DiastolicBP,BS,BodyTemp,HeartRate,RiskLevel
0,25,130,80,15.0,98.0,86,3.0
1,35,140,90,13.0,98.0,70,3.0
2,29,90,70,8.0,100.0,80,3.0
3,30,140,85,7.0,98.0,70,3.0
4,35,120,60,6.1,98.0,76,1.0
...,...,...,...,...,...,...,...
1009,22,120,60,15.0,98.0,80,3.0
1010,55,120,90,18.0,98.0,60,3.0
1011,35,85,60,19.0,98.0,86,3.0
1012,43,120,90,18.0,98.0,70,3.0


In [8]:
df_copy.describe()

Unnamed: 0,Age,SystolicBP,DiastolicBP,BS,BodyTemp,HeartRate,RiskLevel
count,1014.0,1014.0,1014.0,1014.0,1014.0,1014.0,1014.0
mean,29.871795,113.198225,76.460552,8.725986,98.665089,74.301775,1.86785
std,13.474386,18.403913,13.885796,3.293532,1.371384,8.088702,0.807353
min,10.0,70.0,49.0,6.0,98.0,7.0,1.0
25%,19.0,100.0,65.0,6.9,98.0,70.0,1.0
50%,26.0,120.0,80.0,7.5,98.0,76.0,2.0
75%,39.0,120.0,90.0,8.0,98.0,80.0,3.0
max,70.0,160.0,100.0,19.0,103.0,90.0,3.0


In [9]:
df_copy['RiskLevel'].value_counts()

1.0    406
2.0    336
3.0    272
Name: RiskLevel, dtype: int64

In [10]:
duplicate = df_copy.duplicated().any()
duplicate

True

In [11]:
df = df_copy.drop_duplicates()

In [12]:
duplicate = df.duplicated().any()
duplicate

False

In [None]:
df.head()


Unnamed: 0,Age,SystolicBP,DiastolicBP,BS,BodyTemp,HeartRate,RiskLevel
0,25,130,80,15.0,98.0,86,3.0
1,35,140,90,13.0,98.0,70,3.0
2,29,90,70,8.0,100.0,80,3.0
3,30,140,85,7.0,98.0,70,3.0
4,35,120,60,6.1,98.0,76,1.0


In [None]:
df.describe()

Unnamed: 0,Age,SystolicBP,DiastolicBP,BS,BodyTemp,HeartRate,RiskLevel
count,452.0,452.0,452.0,452.0,452.0,452.0,452.0
mean,29.19469,110.553097,75.418142,8.346173,98.692478,73.949115,1.730088
std,13.767379,17.872282,13.754578,2.829209,1.410897,8.156973,0.833169
min,10.0,70.0,49.0,6.0,98.0,7.0,1.0
25%,19.0,90.0,65.0,6.9,98.0,70.0,1.0
50%,25.0,120.0,80.0,7.5,98.0,76.0,1.0
75%,35.0,120.0,86.0,7.9,98.0,80.0,2.0
max,70.0,160.0,100.0,19.0,103.0,90.0,3.0


In [13]:
import sys
sys.executable


'/usr/bin/python3'

In [14]:
X = df.drop('RiskLevel',axis=1)
y = df['RiskLevel']

In [None]:
X


Unnamed: 0,Age,SystolicBP,DiastolicBP,BS,BodyTemp,HeartRate
0,25,130,80,15.0,98.0,86
1,35,140,90,13.0,98.0,70
2,29,90,70,8.0,100.0,80
3,30,140,85,7.0,98.0,70
4,35,120,60,6.1,98.0,76
...,...,...,...,...,...,...
673,12,100,50,6.4,98.0,70
674,15,100,60,6.0,98.0,80
703,15,100,49,7.6,98.0,77
704,12,100,50,6.0,98.0,70


In [None]:
y

0      3.0
1      3.0
2      3.0
3      3.0
4      1.0
      ... 
673    2.0
674    1.0
703    1.0
704    2.0
705    1.0
Name: RiskLevel, Length: 452, dtype: float64

In [15]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,
                                               random_state=42)
y_test

425    3.0
39     1.0
492    2.0
484    1.0
192    3.0
      ... 
29     1.0
171    3.0
377    1.0
26     1.0
7      3.0
Name: RiskLevel, Length: 136, dtype: float64

In [16]:
print(f'Training Shape x:',X_train.shape)
print(f'Testing Shape x:',X_test.shape)
print('*****___________*****___________*****')
print(f'Training Shape y:',X.shape)
print(f'Testing Shape y:',y.shape)

Training Shape x: (316, 6)
Testing Shape x: (136, 6)
*****___________*****___________*****
Training Shape y: (452, 6)
Testing Shape y: (452,)


In [17]:
ss = StandardScaler()

X_train = ss.fit_transform(X_train)

X_test= ss.transform(X_test)

In [26]:
from sklearn.model import SVC

In [None]:
from sklearn.model import SVC

#Create model
model = SVC(kernel='rbf', random_state=0, gamma=.10, C=1.0)
model.fit(X_train, y_train)
print("Train accuracy:",model.score(X_train,y_train))
print("Test accuracy:",model.score(X_test,y_test))

y_pred = model.predict(X_test)
print(y_pred)
cm = confusion_matrix(y_test, y_pred)
print(f'CM:',cm)
print(f'Accuracy:',accuracy_score(y_test, y_pred)* 100 ,'%')
print(classification_report(y_test, model.predict(X_test)))

In [22]:
import numpy as np
from sklearn.preprocessing import StandardScaler

# Assuming `model` is your trained model and `ss` is the StandardScaler instance used for training data

# Input data to be tested
input_data = np.array([25, 100, 63, 7.5, 101, 79]).reshape(1, -1)

# Scale the input data using the same scaler used for X_train
input_data_scaled = ss.transform(input_data)

# Make a prediction
predicted_risk_numeric = model.predict(input_data_scaled)

# Since the predicted_risk_numeric will be in the form of 1.0, 2.0, or 3.0, map it back to 'low risk', 'mid risk', 'high risk'
risk_mapping = {1: 'low risk', 2: 'mid risk', 3: 'high risk'}
predicted_risk_label = risk_mapping[predicted_risk_numeric[0]]

print(f"Predicted Risk Level: {predicted_risk_label}")


Predicted Risk Level: mid risk




In [23]:
import pickle


In [28]:
filename = 'maternal_health_risk_model.sav'
pickle.dump(model, open(filename, 'wb'))

In [29]:
loaded_model = pickle.load(open('maternal_health_risk_model.sav', 'rb'))
