Importing the Dependencies

In [None]:
from google.colab import drive
drive.mount('/content/Maternal Health Risk Data Set.csv')

Mounted at /content/drive


In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import accuracy_score

Data Collection and Analysis
Maternal Health Risk Dataset

In [None]:
# loading the Health Risk dataset to a pandas DataFrame
health_dataset = pd.read_csv('/content/Maternal Health Risk Data Set.csv') 

In [None]:
pd.read_csv?

In [None]:
# printing the first 5 rows of the dataset
health_dataset.head()

Unnamed: 0,Age,SystolicBP,DiastolicBP,BS,BodyTemp,HeartRate,RiskLevel
0,25,130,80,15.0,98.0,86,high risk
1,35,140,90,13.0,98.0,70,high risk
2,29,90,70,8.0,100.0,80,high risk
3,30,140,85,7.0,98.0,70,high risk
4,35,120,60,6.1,98.0,76,low risk


In [None]:
# number of rows and Columns in this dataset
health_dataset.shape

(1014, 7)

In [None]:
# getting the statistical measures of the data
health_dataset.describe()

Unnamed: 0,Age,SystolicBP,DiastolicBP,BS,BodyTemp,HeartRate
count,1014.0,1014.0,1014.0,1014.0,1014.0,1014.0
mean,29.871795,113.198225,76.460552,8.725986,98.665089,74.301775
std,13.474386,18.403913,13.885796,3.293532,1.371384,8.088702
min,10.0,70.0,49.0,6.0,98.0,7.0
25%,19.0,100.0,65.0,6.9,98.0,70.0
50%,26.0,120.0,80.0,7.5,98.0,76.0
75%,39.0,120.0,90.0,8.0,98.0,80.0
max,70.0,160.0,100.0,19.0,103.0,90.0


In [None]:
health_dataset['RiskLevel'].value_counts()

low risk     406
mid risk     336
high risk    272
Name: RiskLevel, dtype: int64

In [None]:
health_dataset.groupby('RiskLevel').mean()

Unnamed: 0_level_0,Age,SystolicBP,DiastolicBP,BS,BodyTemp,HeartRate
RiskLevel,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
high risk,36.216912,124.194853,85.073529,12.12261,98.899265,76.742647
low risk,26.869458,105.866995,72.534483,7.220271,98.368966,72.770936
mid risk,28.363095,113.154762,74.232143,7.795744,98.833333,74.175595


In [None]:
# separating the data and labels
X = health_dataset.drop(columns = 'RiskLevel', axis=1)
Y = health_dataset['RiskLevel']

In [None]:
print(X)

      Age  SystolicBP  DiastolicBP    BS  BodyTemp  HeartRate
0      25         130           80  15.0      98.0         86
1      35         140           90  13.0      98.0         70
2      29          90           70   8.0     100.0         80
3      30         140           85   7.0      98.0         70
4      35         120           60   6.1      98.0         76
...   ...         ...          ...   ...       ...        ...
1009   22         120           60  15.0      98.0         80
1010   55         120           90  18.0      98.0         60
1011   35          85           60  19.0      98.0         86
1012   43         120           90  18.0      98.0         70
1013   32         120           65   6.0     101.0         76

[1014 rows x 6 columns]


In [None]:
print(Y)

0       high risk
1       high risk
2       high risk
3       high risk
4        low risk
          ...    
1009    high risk
1010    high risk
1011    high risk
1012    high risk
1013     mid risk
Name: RiskLevel, Length: 1014, dtype: object


Data Standardization

In [None]:
scaler = StandardScaler()

In [None]:
scaler.fit(X)

StandardScaler()

In [None]:
standardized_data = scaler.transform(X)

In [None]:
print(standardized_data)

[[-0.36173812  0.91339632  0.25502279  1.90589019 -0.4852155   1.44695615]
 [ 0.38077697  1.45702716  0.97553854  1.29833966 -0.4852155  -0.53208757]
 [-0.06473208 -1.26112705 -0.46549297 -0.22053665  0.97388449  0.70481475]
 ...
 [ 0.38077697 -1.53294248 -1.18600873  3.12099124 -0.4852155   1.44695615]
 [ 0.97478904  0.36976548  0.97553854  2.81721597 -0.4852155  -0.53208757]
 [ 0.15802244  0.36976548 -0.82575085 -0.82808717  1.70343448  0.21005383]]


In [None]:
X = standardized_data
Y = health_dataset['RiskLevel']

In [None]:
print(X)
print(Y)

[[-0.36173812  0.91339632  0.25502279  1.90589019 -0.4852155   1.44695615]
 [ 0.38077697  1.45702716  0.97553854  1.29833966 -0.4852155  -0.53208757]
 [-0.06473208 -1.26112705 -0.46549297 -0.22053665  0.97388449  0.70481475]
 ...
 [ 0.38077697 -1.53294248 -1.18600873  3.12099124 -0.4852155   1.44695615]
 [ 0.97478904  0.36976548  0.97553854  2.81721597 -0.4852155  -0.53208757]
 [ 0.15802244  0.36976548 -0.82575085 -0.82808717  1.70343448  0.21005383]]
0       high risk
1       high risk
2       high risk
3       high risk
4        low risk
          ...    
1009    high risk
1010    high risk
1011    high risk
1012    high risk
1013     mid risk
Name: RiskLevel, Length: 1014, dtype: object


Train Test Split

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size = 0.2, stratify=Y, random_state=2)

In [None]:
print(X.shape, X_train.shape, X_test.shape)

(1014, 6) (811, 6) (203, 6)


Training the Model

In [None]:
classifier = svm.SVC(kernel='linear')

In [None]:
#training the support vector Machine Classifier
classifier.fit(X_train, Y_train)

SVC(kernel='linear')

Model Evaluation

Accuracy Score

In [None]:
# accuracy score on the training data
X_train_prediction = classifier.predict(X_train)
training_data_accuracy = accuracy_score(X_train_prediction, Y_train)

In [None]:
print('Accuracy score of the training data : ', training_data_accuracy)

Accuracy score of the training data :  0.6596794081381011


In [None]:
# accuracy score on the test data
X_test_prediction = classifier.predict(X_test)
test_data_accuracy = accuracy_score(X_test_prediction, Y_test)

In [None]:
print('Accuracy score of the test data : ', test_data_accuracy)

Accuracy score of the test data :  0.6699507389162561


Making a Predictive System

In [None]:
input_data = (25,110,89,7.01,98,77)

# changing the input_data to numpy array
input_data_as_numpy_array = np.asarray(input_data)

# reshape the array as we are predicting for one instance
input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)

# standardize the input data
std_data = scaler.transform(input_data_reshaped)
print(std_data)

prediction = classifier.predict(std_data)
print(prediction)


[[-0.36173812 -0.17386537  0.90348697 -0.52127416 -0.4852155   0.33374406]]
['low risk']


