# Working With Healthcare Data in Python Using Numpy and Pandas Libraries

#### Using One-Dimensional Numpy

In [21]:
#Importing the Packages
import numpy as np

In [25]:
#Creating a Numpy array with patients ages

patient_age = np.array([25, 30, 40, 22, 35, 50, 29, 60, 41, 28])
avrg_age = patient_age.mean()

In [27]:
avrg_age

36.0

The average age of the patients is 36

In [31]:
# Creating a Numpy array with the cholesterol levels of patients

chol_level = np.array([210, 180, 230, 250, 300, 220, 200, 190])
chol_level.max()

300

The highest cholesterol level from the list is 300

In [34]:
# Creating a Numpy array of blood pressure readings

blood_pre = np.array([120, 130, 140, 135, 125, 110, 145, 120, 130, 140, 150, 135])
blood_pre.std()

11.055415967851333

The standard deviation of the blood pressure readings is 11.0554

In [37]:
# Creating a Numpy array of body temperatures (in Celsius) for patients
temperatures = np.array([37.1, 36.5, 37.3, 36.8, 37.0, 38.0, 36.7, 37.2, 36.9, 37.5])
temperatures.mean()

37.1

The mean temperature for the patients is 37.1

In [40]:
# Creating a Numpy array of patient's weight measurements

weight = np.array([60, 72, 65, 58, 82, 75, 70, 68, 66, 80])
flipped_w = np.flip(weight)   #This reverses the Numpy array
flipped_w

array([80, 66, 68, 70, 75, 82, 58, 65, 72, 60])

#### Using Two-Dimensional Numpy

In [44]:
# Creating a 3x3 Numpy array with the following patient data (Age, Weight, Blood Pressure)
p_data = np.array([[25, 70, 120],[30, 80, 130],[40, 85, 140]])
p_data

array([[ 25,  70, 120],
       [ 30,  80, 130],
       [ 40,  85, 140]])

In [50]:
# Accessing the blood pressure of the second patient (Using Index)
p_data[1][2]

130

The blood pressure of the second patient is 130

In [57]:
# Creating a 2D Numpy array of 4x4 representing the results of 4 blood tests with 4 parameters
blood_test = np.array([[5.6, 7.1, 6.2, 5.8],
[5.3, 6.9, 7.0, 6.5],
[5.7, 6.5, 6.8, 6.0],
[5.4, 7.0, 6.5, 5.9]]
)

In [61]:
# Summing the values across each row (representing total test results for each patient)
np.sum(blood_test,axis=1)

array([24.7, 25.7, 25. , 24.8])

In [17]:
# Creating a 2D Numpy array with blood sugar levels for 5 patients over 3 visits

blood_sugar=np.array([[80, 85, 90],
[95, 92, 88],
[85, 83, 82],
[100, 105, 110],
[75, 78, 80]])


In [19]:
blood_sugar[2].mean()

83.33333333333333

The average blood sugar level for the 3rd patient across all visits is 83.33

In [68]:
# Creating a matrix representing patient BMI values for 3 patients
bmi=np.array([[22.1, 23.5],
[27.8, 28.0],
[21.2, 20.9]])*np.array([60, 70, 80]).reshape(-1,1)  #Element-wise multiplication of BMI and weight 

In [72]:
# The new bmi product
bmi

array([[1326., 1410.],
       [1946., 1960.],
       [1696., 1672.]])

In [74]:
# Creating a 2x5 Numpy array to represent the temperature data for 2 patients taken over 5 days
p_temp=np.array([[37.0, 36.5, 37.1, 36.9, 37.0],
[36.8, 37.3, 37.0, 36.7, 37.2]])

In [31]:
np.max(p_temp,axis=1)

array([37.1, 37.3])

The maximum temperature for each patient is 37.1 and 37.3

#### Using Pandas Series

In [None]:
# SERIES

In [78]:
import pandas as pd

In [80]:
# Creating a Pandas series representing the blood pressure readings of 5 patients

blood_pressure=pd.Series([120, 130, 140, 135, 125])
blood_pressure.max()

140

In [9]:
blood_pressure.min()

120

The blood pressure values has a maximum of 140 and a minimum of 120

In [83]:
# Creating a Pandas Series with cholesterol levels for 6 patients

chol=pd.Series([210, 180, 220, 250, 280, 230])
chol>240

0    False
1    False
2    False
3     True
4     True
5    False
dtype: bool

In [85]:
chol[chol>240]

3    250
4    280
dtype: int64

There are only two patients with cholesterol levels above 240; patients at index 3 and 4

In [88]:
# Creating  a Pandas Series for 10 patients' ages

p_age=pd.Series([35, 40, 25, 30, 45, 50, 55, 60, 65, 70])
p_age.median()

47.5

The median age of the patients is 47.5

In [91]:
# Creating a Pandas Series with weight measurements in kilograms for 5 patients

p_weight=pd.Series([70, 80, 75, 65, 85])
new_p_weight=p_weight + 5             # Adding 5kg to each patient's weight
new_p_weight

0    75
1    85
2    80
3    70
4    90
dtype: int64

In [93]:
# Creating a Pandas Series with 10 temperature readings

temp=pd.Series([37.2, 36.8, 37.5, 38.0, 36.6, 37.1, 36.9, 37.3, 37.4, 37.0])
temp.mean()

37.17999999999999

The average temperature of all 10 patients is 37.1799

#### Using Pandas DataFrame

In [109]:
# Creating a DataFrame with patient data (Name, Age, Blood Pressure, Cholesterol)
data_dic = {'Name': ['John', 'Jane', 'Tom', 'Alice'],
        'Age': [28, 34, 45, 52],
        'Blood Pressure': [120, 130, 140, 150],
        'Cholesterol': [200, 220, 240, 260]}
data_dic

{'Name': ['John', 'Jane', 'Tom', 'Alice'],
 'Age': [28, 34, 45, 52],
 'Blood Pressure': [120, 130, 140, 150],
 'Cholesterol': [200, 220, 240, 260]}

In [149]:
patients_data1=pd.DataFrame(data_dic)
patients_data1

Unnamed: 0,Name,Age,Blood Pressure,Cholesterol
0,John,28,120,200
1,Jane,34,130,220
2,Tom,45,140,240
3,Alice,52,150,260


In [151]:
patients_data1[patients_data1['Cholesterol']>220]

Unnamed: 0,Name,Age,Blood Pressure,Cholesterol
2,Tom,45,140,240
3,Alice,52,150,260


All Patients with cholesterol greater than 220 

In [153]:
patients_data1

Unnamed: 0,Name,Age,Blood Pressure,Cholesterol
0,John,28,120,200
1,Jane,34,130,220
2,Tom,45,140,240
3,Alice,52,150,260


In [118]:
# Creating a DataFrame to store patient information (Name, Age, Height, Weight, Blood Pressure) 
data_bmi = {'Name': ['John', 'Jane', 'Tom', 'Alice'],
        'Age': [28, 34, 45, 52],
        'Blood Pressure': [120, 130, 140, 150],
        'Cholesterol': [200, 220, 240, 260],
         'Height': [7, 8, 5, 6],
          'Weight': [30, 50, 75,65]}

In [120]:
df_bmi = pd.DataFrame(data_bmi)

In [122]:
# Adding a new column for BMI where BMI = Weight / (Height^2)

df_bmi['BMI'] = df_bmi['Weight'] / (df_bmi['Height'] ** 2)

In [124]:
print(df_bmi)

    Name  Age  Blood Pressure  Cholesterol  Height  Weight       BMI
0   John   28             120          200       7      30  0.612245
1   Jane   34             130          220       8      50  0.781250
2    Tom   45             140          240       5      75  3.000000
3  Alice   52             150          260       6      65  1.805556


In [128]:
# Creating a DataFrame with patient blood test results (Test ID, Patient Name, Result)
patient_blood_test = ({'Test ID': [1, 2, 3, 4],
                       'Patient Name': ['John', 'Jane', 'Tom', 'Alice'],
                       'Result': [5.6, 6.1, 5.8, 6.3]})


In [130]:
patient_df=pd.DataFrame(patient_blood_test)
patient_df

Unnamed: 0,Test ID,Patient Name,Result
0,1,John,5.6
1,2,Jane,6.1
2,3,Tom,5.8
3,4,Alice,6.3


In [134]:
# Filtering patients based on test result

patient_df[patient_df['Result']>6.0]

Unnamed: 0,Test ID,Patient Name,Result
1,2,Jane,6.1
3,4,Alice,6.3


Patients with test result greater than 6.0

In [139]:
patient_df['Result']>6.0

0    False
1     True
2    False
3     True
Name: Result, dtype: bool

In [155]:
data_dic2=  {'Name': ['John', 'Jane', 'Tom', 'Alice'],
        'Age': [28, 34, 45, 52],
        'Blood Pressure': [120, 130, 140, 150],
        'Cholesterol': [200, 220, 240, 260],
         'Height': [7, 8, 5, 6],
          'Weight': [30, 50, 75,65]}

In [157]:
patients_data2=pd.DataFrame(data_dic2)
patients_data

Unnamed: 0,Name,Age,Blood Pressure,Cholesterol
0,John,28,120,200
1,Jane,34,130,220
2,Tom,45,140,240
3,Alice,52,150,260


In [159]:
#Calculating the average of patient data column uding .mean()

patients_data2[['Age','Height','Weight']].mean()

Age       39.75
Height     6.50
Weight    55.00
dtype: float64

In [161]:
# Creating a DataFrame with treatment details (Patient Name, Treatment Type, Treatment Date)
treatment_details = {'Patient Name': ['John', 'Jane', 'Tom', 'Alice'],
                     'Treatment Type': ['Surgery', 'Consultation', 'Surgery', 'Consultation'],
                     'Treatment Date': ['2022-01-01', '2022-02-01', '2022-03-01', '2022-04-01']}
treatment_details

{'Patient Name': ['John', 'Jane', 'Tom', 'Alice'],
 'Treatment Type': ['Surgery', 'Consultation', 'Surgery', 'Consultation'],
 'Treatment Date': ['2022-01-01', '2022-02-01', '2022-03-01', '2022-04-01']}

In [163]:
# Passing the treatment details to DataFrame
treatment_df = pd.DataFrame(treatment_details)
treatment_df

Unnamed: 0,Patient Name,Treatment Type,Treatment Date
0,John,Surgery,2022-01-01
1,Jane,Consultation,2022-02-01
2,Tom,Surgery,2022-03-01
3,Alice,Consultation,2022-04-01


In [20]:
# Finding records with "Surgery" treatment 

treatment_df[treatment_df['Treatment Type']=='Surgery']

Unnamed: 0,Patient Name,Treatment Type,Treatment Date
0,John,Surgery,2022-01-01
2,Tom,Surgery,2022-03-01


In [29]:
treatment_df.tail(-1)

Unnamed: 0,Patient Name,Treatment Type,Treatment Date
1,Jane,Consultation,2022-02-01
2,Tom,Surgery,2022-03-01
3,Alice,Consultation,2022-04-01
