# Import necessary libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
from sklearn.preprocessing import StandardScaler

### Data File description 
#### sex: 1 for female and 2 for male.
###### age: of the patient.
##### classification: covid test findings. Values 1-3 mean that the patient was diagnosed with covid in different degrees. 4 or higher means that the patient is not a carrier of covid or that the test is inconclusive.
###### patient type: type of care the patient received in the unit. 1 for returned home and 2 for hospitalization.
##### pneumonia: whether the patient already have air sacs inflammation or not.
###### pregnancy: whether the patient is pregnant or not.
##### diabetes: whether the patient has diabetes or not.
###### copd: Indicates whether the patient has Chronic obstructive pulmonary disease or not.
##### asthma: whether the patient has asthma or not.
###### inmsupr: whether the patient is immunosuppressed or not.
##### hypertension: whether the patient has hypertension or not.
###### cardiovascular: whether the patient has heart or blood vessels related disease.
##### renal chronic: whether the patient has chronic renal disease or not.
###### other disease: whether the patient has other disease or not.
##### obesity: whether the patient is obese or not.
###### tobacco: whether the patient is a tobacco user.
##### usmr: Indicates whether the patient treated medical units of the first, second or third level.
###### medical unit: type of institution of the National Health System that provided the care.
##### intubed: whether the patient was connected to the ventilator.
###### icu: Indicates whether the patient had been admitted to an Intensive Care Unit.
##### date died: If the patient died indicate the date of death, and 9999-99-99 otherwise.

# Data Manipulation

In [None]:
# loading data 
df = pd.read_csv('Covid Data.csv')

# taking a peak at data
df.head()

We're missing an important feature in the csv file and that is we don't know the patient is dead or not; but we have the DATE_DIED so lets create a column to indicate which patient is dead and which isn't.

In [None]:
# changing DATE_DIED feature as: 9999-99-9 -- not dead(1), otherwise -- dead(0)
df_1 = df[['USMER', 'MEDICAL_UNIT', 'SEX' ,'PATIENT_TYPE', 'INTUBED', 'PNEUMONIA', 'AGE', 'PREGNANT', 'DIABETES', 'COPD', 'ASTHMA', 'INMSUPR', 'HIPERTENSION', 'OTHER_DISEASE', 'CARDIOVASCULAR', 'OBESITY', 'RENAL_CHRONIC', 'TOBACCO', 'CLASIFFICATION_FINAL', 'ICU', 'DATE_DIED']] 
df_1['RESULT'] = np.where(df['DATE_DIED'] == '9999-99-99', 0, 1)
df_1.drop(columns=['DATE_DIED'],axis=1,inplace=True)

# take a peak at the change
df_1.head()

In [None]:
# Get a understanding of what parameters were more envolved in each group age of 5.

bins = np.arange(0,130,5)
groups = df_1.groupby(pd.cut(df_1['AGE'],bins)).mean()
print(groups.head())
groups['SEX'].plot.line()

In [None]:
# Lets try the same thing but with RESULT to see the number of deaths in each age group.
bins = np.arange(0,130,5)
groups = df_1.groupby(pd.cut(df_1['AGE'],bins)).mean()
print(groups.head())
groups['RESULT'].plot.line()

By getting mean amount of each parameter envolved we can get so much information. such as: 

- apparently in groups of 0-25 years old, males had been more infected; 25-50 females were more infected; it switchs in 50-90 back to males; and so on and so forth; the intersting point is the significant increase amount of infected males in 110-115.

- between the ages 0-65 most patinets had at home treatments while 65 and older had hospital treatments; and again there's significant increase in age 110-115 that had hospital treatment.

We need to know which features are the important ones. Sounds like a problem multiple regression can solve by giving us the correlation between each feature and result.

# Extracting important features

In [None]:
# With scaling each paramater, we can use multiple regression to understand what parameters had more or less effect and also the correlation.
scale = StandardScaler()


X = df_1[['USMER', 'MEDICAL_UNIT', 'SEX' ,'PATIENT_TYPE', 'INTUBED', 'PNEUMONIA', 'AGE', 'PREGNANT', 'DIABETES', 'COPD', 'ASTHMA', 'INMSUPR', 'HIPERTENSION', 'OTHER_DISEASE', 'CARDIOVASCULAR', 'OBESITY', 'RENAL_CHRONIC', 'TOBACCO', 'CLASIFFICATION_FINAL', 'ICU']]
y = df_1[['RESULT']]
X[['USMER', 'MEDICAL_UNIT', 'SEX' ,'PATIENT_TYPE', 'INTUBED', 'PNEUMONIA', 'AGE', 'PREGNANT', 'DIABETES', 'COPD', 'ASTHMA', 'INMSUPR', 'HIPERTENSION', 'OTHER_DISEASE', 'CARDIOVASCULAR', 'OBESITY', 'RENAL_CHRONIC', 'TOBACCO', 'CLASIFFICATION_FINAL', 'ICU']] = scale.fit_transform(X[['USMER', 'MEDICAL_UNIT', 'SEX' ,'PATIENT_TYPE', 'INTUBED', 'PNEUMONIA', 'AGE', 'PREGNANT', 'DIABETES', 'COPD', 'ASTHMA', 'INMSUPR', 'HIPERTENSION', 'OTHER_DISEASE', 'CARDIOVASCULAR', 'OBESITY', 'RENAL_CHRONIC', 'TOBACCO', 'CLASIFFICATION_FINAL', 'ICU']])
# print(X)

est = sm.OLS(y, X).fit()
est.summary()

coef amount indicates the correlation.

strongest positive correlations are: ICU, AGE, PATIENT_TYPE, SEX, 
Strongest negative correlations are: INTUBED, CLASIFFICATION_FINAL, MEDICAL_UNIT

let's use TOBACCO too just because it's common.

positive correlation is telling us that the more patients had ICU, the older they were, had hospital treatments(PATIENT_TYPE), and males have more chance of dying to this disease.

negative correlation is telling us that the more patients had used ventilator machines(INTUBED), patient was diagnosed with covid in different situations(CLASIFFICATION_FINAL), and the type of institution of the National Health System that provided the care(MEDICAL_UNIT), the less chance of dying they have to this disease.


# Use important features

In [None]:
# create another model with only the stronge correlations
df_2 = df_1[['ICU', 'PATIENT_TYPE', 'AGE', 'SEX', 'TOBACCO','INTUBED', 'CLASIFFICATION_FINAL', 'MEDICAL_UNIT', 'RESULT']]
df_2.head()

In [None]:
strScaler = StandardScaler()
X1 = df_2[['ICU', 'PATIENT_TYPE', 'AGE', 'SEX', 'TOBACCO','INTUBED', 'CLASIFFICATION_FINAL', 'MEDICAL_UNIT']]
y1 = df_2[['RESULT']]
X1[['ICU', 'PATIENT_TYPE', 'AGE', 'SEX', 'TOBACCO','INTUBED', 'CLASIFFICATION_FINAL', 'MEDICAL_UNIT']] = strScaler.fit_transform(X1[['ICU', 'PATIENT_TYPE', 'AGE', 'SEX', 'TOBACCO','INTUBED', 'CLASIFFICATION_FINAL', 'MEDICAL_UNIT']])
# print(X1)

estimation = sm.OLS(y1, X1).fit()
estimation.summary()

# Predict

In [None]:
# We can also predict death possibility of a patient with specified features.
# Patient Features[['ICU', 'PATIENT_TYPE', 'AGE', 'SEX', 'TOBACCO','INTUBED', 'CLASIFFICATION_FINAL', 'MEDICAL_UNIT']]
scaled = strScaler.transform([[80, 2, 34, 2, 1, 20, 7, 1]])
print(scaled)
predicted = estimation.predict(scaled)
print(predicted)
print(f"Death probability: {round(predicted[0]* 100,2)} %")

# Recommendation

In [None]:
# Gather information about patient
name = "Omid"
ICU = 10
PATIENT_TYPE = 1
AGE = 22
SEX = 2
TOBACCO = 1
INTUBED = 1
CLASIFFICATION_FINAL = 2
MEDICAL_UNIT = 1

In [None]:
# Find the best amount for ICU
prob = 0
for i in range(0,101,10):
    patient = [[i, PATIENT_TYPE, AGE, SEX, TOBACCO, INTUBED, CLASIFFICATION_FINAL, MEDICAL_UNIT]]
    scaled = strScaler.transform(patient)
    predicted = round(estimation.predict(scaled)[0] * 100, 2)
    if predicted < prob:
        prob = predicted
        ICU_max = i
print(ICU_max)

In [None]:
# Find the best amount for PATIENT_TYPE
prob = 0
for i in range(1,3):
    patient = [[ICU_max, i, AGE, SEX, TOBACCO, INTUBED, CLASIFFICATION_FINAL, MEDICAL_UNIT]]
    scaled = strScaler.transform(patient)
    predicted = round(estimation.predict(scaled)[0] * 100, 2)
    if predicted < prob:
        prob = predicted
        PATIENT_TYPE_max = i
print(PATIENT_TYPE_max)

In [None]:
# Find the best amount for TOBACCO
prob = 0
for i in range(0, 101, 10):
    patient = [[ICU_max, PATIENT_TYPE_max, AGE, SEX, i, INTUBED, CLASIFFICATION_FINAL, MEDICAL_UNIT]]
    scaled = strScaler.transform(patient)
    predicted = round(estimation.predict(scaled)[0] * 100, 2)
    if predicted < prob:
        prob = predicted
        TOBACCO_max = i
print(TOBACCO_max)

In [None]:
# Find the best amount for INTUBED
prob = 0
for i in range(0, 101, 10):
    patient = [[ICU_max, PATIENT_TYPE_max, AGE, SEX, TOBACCO_max, i, CLASIFFICATION_FINAL, MEDICAL_UNIT]]
    scaled = strScaler.transform(patient)
    predicted = round(estimation.predict(scaled)[0] * 100, 2)
    # print(i, predicted, INTUBED_max)
    if predicted < prob:
        prob = predicted
        INTUBED_max = i
print(INTUBED_max)

In [None]:
# Find the best amount for CLASIFFICATION_FINAL
prob = 0
for i in range(1, 5):
    patient = [[ICU_max, PATIENT_TYPE_max, AGE, SEX, TOBACCO_max, INTUBED_max, i, MEDICAL_UNIT]]
    scaled = strScaler.transform(patient)
    predicted = round(estimation.predict(scaled)[0] * 100, 2)
#     print(i, predicted, CLASIFFICATION_FINAL_max)
    if predicted < prob:
        prob = predicted
        CLASIFFICATION_FINAL_max = i
print(CLASIFFICATION_FINAL_max)

In [None]:
# Find the best amount for MEDICAL_UNIT
prob = 0
for i in range(1, 14, 2):
    patient = [[ICU_max, PATIENT_TYPE_max, AGE, SEX, TOBACCO_max, INTUBED_max, CLASIFFICATION_FINAL_max, i]]
    scaled = strScaler.transform(patient)
    predicted = round(estimation.predict(scaled)[0] * 100, 2)
#     print(i, predicted, MEDICAL_UNIT_max)
    if predicted < prob:
        prob = predicted
        MEDICAL_UNIT_max = i
print(MEDICAL_UNIT_max)

## Analysis

In [None]:
print(ICU_max, PATIENT_TYPE_max, AGE, SEX, TOBACCO_max, INTUBED_max, CLASIFFICATION_FINAL_max, MEDICAL_UNIT_max)

In [None]:
print(f"Analysis on patient {name}")
if SEX == 1: Sex = 'Female'
else: Sex = 'Male'
print(f"\tAge: {AGE}\t Sex: {Sex} \n")

print("ICU: ")
if ICU_max < 35:
    print("Recommend less than 35 or minimum units of ICU for this patient. \n")
elif ICU_max > 70:
    print("Recommend more than 70 or maximum units of ICU for this Patient. \n")
else: print("Recommend a moderate units of ICU about 50 units. \n")

print("Home or Hospital Treatment: ")
if PATIENT_TYPE_max == 1:
    print("Recommend Home treatment for this patient. \n")
else: print("Recommend necessary Hospital treatment for this patient. \n")

print("Tobacco usage: ")
if TOBACCO < 10 and TOBACCO_max < 35 or TOBACCO > 10 and TOBACCO_max < 35:
    print("Patient shouldn't go near Tobacco. \n")
elif TOBACCO > 70 and TOBACCO_max > 70:
    print("Patient can use Tobacco as before infection but it must be supervised. \n")
elif TOBACCO_max > 70:
    print("Recommend a moderate, prescribed and supervised amount of Tobacco. \n")

print("Ventilator Machine usage: ")
if INTUBED_max < 35:
    print("Recommend less than 35 or minimum units of Ventilation for this patient. \n")
elif INTUBED_max > 70:
    print("Recommend more than 70 or maximum units of Ventilation for this Patient. \n")
else: print("Recommend a moderate units of Ventilation about 50 units. \n")

print("Type of health system required for this patient: ")
if MEDICAL_UNIT_max <= 2:
    print("Maximum Type of Medical unit is required for this patient. \n")
elif MEDICAL_UNIT_max <= 4:
    print("Type 3-4 is recommended for this patient. \n")
elif MEDICAL_UNIT_max <= 6:
    print("Type 5-6 is recommended for this patient. \n")
elif MEDICAL_UNIT_max <= 8:
    print("Type 7-8 is recommended for this patient. \n")
elif MEDICAL_UNIT_max <= 10:
    print("Type 9-10 is recommended for this patient. \n")
else: print("Any available units is enough for this patient. \n")
    
print("Covid degree Recommendations: ")
if CLASIFFICATION_FINAL_max == 1:
    print("This patient have the highest chance for survival.")
elif CLASIFFICATION_FINAL_max == 2:
    print("This patient have a high chance for survival but necessary treatments are needed. \n")
elif CLASIFFICATION_FINAL_max == 3:
    print("This patient needs treatments and to be supervised. \n")
else: print("This patient test was either not a carrier or inconclusive. \nRecommend a period of supervision to confirm Covid infection. \n")

# ------------------------------------------- Final prediction --------------------------------------------
patient = [[ICU, PATIENT_TYPE, AGE, SEX, TOBACCO, INTUBED, CLASIFFICATION_FINAL, MEDICAL_UNIT]]
scaled = strScaler.transform(patient)
first_predicted = round(estimation.predict(scaled)[0] * 100, 2)
patient = [[ICU_max, PATIENT_TYPE_max, AGE, SEX, TOBACCO_max, INTUBED_max, CLASIFFICATION_FINAL_max, MEDICAL_UNIT_max]]
scaled = strScaler.transform(patient)
final_predicted = round(estimation.predict(scaled)[0] * 100, 2)
print(f"\tDeath chance before recommendation: {first_predicted}\t Death chance after recommendatios: {final_predicted}")