# Importing Libraries

In [2]:
import torch
from torch import nn
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

Setting up device diagnostic code, to run our code on gpu directly

In [3]:
device = "cuda" if torch.cuda.is_available else "cpu"
device

'cuda'

Importing data into df

In [4]:
df = pd.read_csv("D:/Codes/Pycharm Projects/PYTORCH/PROJECTS/AI Healthcare/AI-IN-HEALTHCARE/Case_Study - 1 [ Predicting Disease Outcomes from Patient Data ]/Data/hospital data analysis.csv")
df

Unnamed: 0,Patient_ID,Age,Gender,Condition,Procedure,Cost,Length_of_Stay,Readmission,Outcome,Satisfaction
0,1,45,Female,Heart Disease,Angioplasty,15000,5,No,Recovered,4
1,2,60,Male,Diabetes,Insulin Therapy,2000,3,Yes,Stable,3
2,3,32,Female,Fractured Arm,X-Ray and Splint,500,1,No,Recovered,5
3,4,75,Male,Stroke,CT Scan and Medication,10000,7,Yes,Stable,2
4,5,50,Female,Cancer,Surgery and Chemotherapy,25000,10,No,Recovered,4
...,...,...,...,...,...,...,...,...,...,...
979,996,68,Male,Hypertension,Medication and Counseling,1000,70,No,Stable,4
980,997,45,Female,Appendicitis,Appendectomy,8000,72,No,Recovered,3
981,998,40,Male,Fractured Leg,Cast and Physical Therapy,3000,72,No,Recovered,4
982,999,78,Female,Heart Attack,Cardiac Catheterization,18000,74,Yes,Stable,2


All the the columns and their dtypes from the dataframe

In [5]:
df.dtypes

Patient_ID         int64
Age                int64
Gender            object
Condition         object
Procedure         object
Cost               int64
Length_of_Stay     int64
Readmission       object
Outcome           object
Satisfaction       int64
dtype: object

All the unique values form the " Conditions " column

In [6]:
unique_condition = df['Condition'].unique()
for condition in unique_condition:
    print(condition)

Heart Disease
Diabetes
Fractured Arm
Stroke
Cancer
Hypertension
Appendicitis
Fractured Leg
Heart Attack
Allergic Reaction
Respiratory Infection
Prostate Cancer
Childbirth
Kidney Stones
Osteoarthritis


Conditions and Outcome grouped together for better understanding

In [7]:
outcome_by_condition = df.groupby(['Condition','Outcome']).size().unstack(fill_value = 0)
outcome_by_condition

Outcome,Recovered,Stable
Condition,Unnamed: 1_level_1,Unnamed: 2_level_1
Allergic Reaction,66,0
Appendicitis,66,0
Cancer,66,0
Childbirth,65,0
Diabetes,0,65
Fractured Arm,66,0
Fractured Leg,67,0
Heart Attack,0,67
Heart Disease,65,0
Hypertension,0,66


Grouped by AGE for better understanding of AGE wise distribution of the Outcome of the patients

In [8]:
outcome_by_condition = df.groupby(['Age','Condition','Outcome']).size().unstack(fill_value = 0)
outcome_by_condition

Unnamed: 0_level_0,Outcome,Recovered,Stable
Age,Condition,Unnamed: 2_level_1,Unnamed: 3_level_1
25,Allergic Reaction,34,0
28,Allergic Reaction,32,0
30,Childbirth,1,0
30,Fractured Arm,32,0
32,Childbirth,32,0
32,Fractured Arm,1,0
35,Childbirth,32,0
35,Fractured Arm,33,0
40,Fractured Leg,34,0
45,Appendicitis,32,0


Copying the dataframe DF into a new dataframe DF1 to perform data cleaning and exploration.
Dropped column " Patient_ID " as it was of no use.

In [9]:
df1 = df
df1.drop('Patient_ID', axis = 'columns', inplace = True)
df1.shape

(984, 9)

In [10]:
df1.dtypes

Age                int64
Gender            object
Condition         object
Procedure         object
Cost               int64
Length_of_Stay     int64
Readmission       object
Outcome           object
Satisfaction       int64
dtype: object

Showing the values of each column those have a dtype of object.

In [11]:
def condition_per_outcome_object(df):
    for column in df:
        if df[column].dtypes == 'object':
            print(f"{column} : {df[column].unique()}")

condition_per_outcome_object(df1)

Gender : ['Female' 'Male']
Condition : ['Heart Disease' 'Diabetes' 'Fractured Arm' 'Stroke' 'Cancer'
 'Hypertension' 'Appendicitis' 'Fractured Leg' 'Heart Attack'
 'Allergic Reaction' 'Respiratory Infection' 'Prostate Cancer'
 'Childbirth' 'Kidney Stones' 'Osteoarthritis']
Procedure : ['Angioplasty' 'Insulin Therapy' 'X-Ray and Splint'
 'CT Scan and Medication' 'Surgery and Chemotherapy'
 'Medication and Counseling' 'Appendectomy' 'Cast and Physical Therapy'
 'Cardiac Catheterization' 'Epinephrine Injection' 'Antibiotics and Rest'
 'Radiation Therapy' 'Delivery and Postnatal Care' 'Lithotripsy'
 'Physical Therapy and Pain Management']
Readmission : ['No' 'Yes']
Outcome : ['Recovered' 'Stable']


Converted the columns with [ "Yes", "No" ] to [0, 1]

In [16]:
# df1['Readmission'] = df1['Readmission'].str.strip().str.lower()
# df1['Readmission'].replace({"yes" : 1, "no" : 0}, inplace = True)

def condition_per_outcome(df):
    for column in df:
        print(f"{column} : {df[column].unique()}")
condition_per_outcome(df1)

Age : [45 60 32 75 50 68 55 40 70 25 48 65 30 52 58 62 35 78 53 72 28 67]
Gender : [1 0]
Condition : ['Heart Disease' 'Diabetes' 'Fractured Arm' 'Stroke' 'Cancer'
 'Hypertension' 'Appendicitis' 'Fractured Leg' 'Heart Attack'
 'Allergic Reaction' 'Respiratory Infection' 'Prostate Cancer'
 'Childbirth' 'Kidney Stones' 'Osteoarthritis']
Procedure : ['Angioplasty' 'Insulin Therapy' 'X-Ray and Splint'
 'CT Scan and Medication' 'Surgery and Chemotherapy'
 'Medication and Counseling' 'Appendectomy' 'Cast and Physical Therapy'
 'Cardiac Catheterization' 'Epinephrine Injection' 'Antibiotics and Rest'
 'Radiation Therapy' 'Delivery and Postnatal Care' 'Lithotripsy'
 'Physical Therapy and Pain Management']
Cost : [15000  2000   500 10000 25000  1000  8000  3000 18000   100   800 20000
 12000  6000  4000]
Length_of_Stay : [ 5  3  1  7 10  2  4  6  8  9 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
 49 50 51 52 53 54 55 56 57 58 5

In [17]:
df1['Readmission'].unique()

array([0, 1])

In [18]:
# df1['Gender'].replace({"Female" : 1, "Male" : 0}, inplace = True)
# df1['Outcome'].replace({"Recovered" : 1, "Stable" : 0}, inplace = True)
condition_per_outcome(df1)

Age : [45 60 32 75 50 68 55 40 70 25 48 65 30 52 58 62 35 78 53 72 28 67]
Gender : [1 0]
Condition : ['Heart Disease' 'Diabetes' 'Fractured Arm' 'Stroke' 'Cancer'
 'Hypertension' 'Appendicitis' 'Fractured Leg' 'Heart Attack'
 'Allergic Reaction' 'Respiratory Infection' 'Prostate Cancer'
 'Childbirth' 'Kidney Stones' 'Osteoarthritis']
Procedure : ['Angioplasty' 'Insulin Therapy' 'X-Ray and Splint'
 'CT Scan and Medication' 'Surgery and Chemotherapy'
 'Medication and Counseling' 'Appendectomy' 'Cast and Physical Therapy'
 'Cardiac Catheterization' 'Epinephrine Injection' 'Antibiotics and Rest'
 'Radiation Therapy' 'Delivery and Postnatal Care' 'Lithotripsy'
 'Physical Therapy and Pain Management']
Cost : [15000  2000   500 10000 25000  1000  8000  3000 18000   100   800 20000
 12000  6000  4000]
Length_of_Stay : [ 5  3  1  7 10  2  4  6  8  9 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
 49 50 51 52 53 54 55 56 57 58 5

In [20]:
df1.dtypes

Age                int64
Gender             int64
Condition         object
Procedure         object
Cost               int64
Length_of_Stay     int64
Readmission        int64
Outcome            int64
Satisfaction       int64
dtype: object

In [23]:
df2 = pd.get_dummies(data = df1, columns = ['Condition', 'Procedure'])
df2

Unnamed: 0,Age,Gender,Cost,Length_of_Stay,Readmission,Outcome,Satisfaction,Condition_Allergic Reaction,Condition_Appendicitis,Condition_Cancer,...,Procedure_Cast and Physical Therapy,Procedure_Delivery and Postnatal Care,Procedure_Epinephrine Injection,Procedure_Insulin Therapy,Procedure_Lithotripsy,Procedure_Medication and Counseling,Procedure_Physical Therapy and Pain Management,Procedure_Radiation Therapy,Procedure_Surgery and Chemotherapy,Procedure_X-Ray and Splint
0,45,1,15000,5,0,1,4,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,60,0,2000,3,1,0,3,False,False,False,...,False,False,False,True,False,False,False,False,False,False
2,32,1,500,1,0,1,5,False,False,False,...,False,False,False,False,False,False,False,False,False,True
3,75,0,10000,7,1,0,2,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4,50,1,25000,10,0,1,4,False,False,True,...,False,False,False,False,False,False,False,False,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
979,68,0,1000,70,0,0,4,False,False,False,...,False,False,False,False,False,True,False,False,False,False
980,45,1,8000,72,0,1,3,False,True,False,...,False,False,False,False,False,False,False,False,False,False
981,40,0,3000,72,0,1,4,False,False,False,...,True,False,False,False,False,False,False,False,False,False
982,78,1,18000,74,1,0,2,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [24]:
condition_per_outcome(df2)

Age : [45 60 32 75 50 68 55 40 70 25 48 65 30 52 58 62 35 78 53 72 28 67]
Gender : [1 0]
Cost : [15000  2000   500 10000 25000  1000  8000  3000 18000   100   800 20000
 12000  6000  4000]
Length_of_Stay : [ 5  3  1  7 10  2  4  6  8  9 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 67 66 68 69 70 71 72
 73 74 75 76]
Readmission : [0 1]
Outcome : [1 0]
Satisfaction : [4 3 5 2]
Condition_Allergic Reaction : [False  True]
Condition_Appendicitis : [False  True]
Condition_Cancer : [False  True]
Condition_Childbirth : [False  True]
Condition_Diabetes : [False  True]
Condition_Fractured Arm : [False  True]
Condition_Fractured Leg : [False  True]
Condition_Heart Attack : [False  True]
Condition_Heart Disease : [ True False]
Condition_Hypertension : [False  True]
Condition_Kidney Stones : [False  True]
Condition_Osteoarthritis : [False  True]
Condition_Prostate Cancer : [Fals

In [25]:
df2.dtypes

Age                                               int64
Gender                                            int64
Cost                                              int64
Length_of_Stay                                    int64
Readmission                                       int64
Outcome                                           int64
Satisfaction                                      int64
Condition_Allergic Reaction                        bool
Condition_Appendicitis                             bool
Condition_Cancer                                   bool
Condition_Childbirth                               bool
Condition_Diabetes                                 bool
Condition_Fractured Arm                            bool
Condition_Fractured Leg                            bool
Condition_Heart Attack                             bool
Condition_Heart Disease                            bool
Condition_Hypertension                             bool
Condition_Kidney Stones                         

In [27]:
cols_to_scale = ['Age','Cost','Length_of_Stay','Satisfaction']

from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()

df2[cols_to_scale] = scaler.fit_transform(df2[cols_to_scale])

In [30]:
df2

Unnamed: 0,Age,Gender,Cost,Length_of_Stay,Readmission,Outcome,Satisfaction,Condition_Allergic Reaction,Condition_Appendicitis,Condition_Cancer,...,Procedure_Cast and Physical Therapy,Procedure_Delivery and Postnatal Care,Procedure_Epinephrine Injection,Procedure_Insulin Therapy,Procedure_Lithotripsy,Procedure_Medication and Counseling,Procedure_Physical Therapy and Pain Management,Procedure_Radiation Therapy,Procedure_Surgery and Chemotherapy,Procedure_X-Ray and Splint
0,0.377358,1,0.598394,0.053333,0,1,0.666667,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,0.660377,0,0.076305,0.026667,1,0,0.333333,False,False,False,...,False,False,False,True,False,False,False,False,False,False
2,0.132075,1,0.016064,0.000000,0,1,1.000000,False,False,False,...,False,False,False,False,False,False,False,False,False,True
3,0.943396,0,0.397590,0.080000,1,0,0.000000,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4,0.471698,1,1.000000,0.120000,0,1,0.666667,False,False,True,...,False,False,False,False,False,False,False,False,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
979,0.811321,0,0.036145,0.920000,0,0,0.666667,False,False,False,...,False,False,False,False,False,True,False,False,False,False
980,0.377358,1,0.317269,0.946667,0,1,0.333333,False,True,False,...,False,False,False,False,False,False,False,False,False,False
981,0.283019,0,0.116466,0.946667,0,1,0.666667,False,False,False,...,True,False,False,False,False,False,False,False,False,False
982,1.000000,1,0.718876,0.973333,1,0,0.000000,False,False,False,...,False,False,False,False,False,False,False,False,False,False
