In [1]:
import numpy as np 
import pandas as pd 

In [2]:
# Loading data

In [3]:
def init_features(data):
    """
    Initialize names for observation features and treatment features
    
    Symptoms (10 bits): Covid-Recovered, Covid-Positive, No-Taste/Smell, 
        Fever, Headache, Pneumonia, Stomach, Myocarditis, Blood-Clots, Death
    Age (integer)
    Gender (binary)
    Income (floating)
    Genome (128 bits)
    Comorbidities (6 bits): Asthma, Obesity, Smoking, Diabetes, Heart disease, Hypertension
    Vaccination status (3 bits): 0 for unvaccinated, 1 for receiving a specific vaccine for each bit
    """
    features_data = pd.read_csv(data)
    # features =  ["Covid-Recovered", "Age", "Gender", "Income", "Genome", "Comorbidities", "Vaccination status"]
    features = []
    # features += ["Symptoms" + str(i) for i in range(1, 11)]
    features += ["Covid-Recovered", "Covid-Positive", "No-Taste/Smell", "Fever", 
                 "Headache", "Pneumonia", "Stomach", "Myocarditis", 
                 "Blood-Clots", "Death"]
    features += ["Age", "Gender", "Income"]
    features += ["Genome" + str(i) for i in range(1, 129)]
    # features += ["Comorbidities" + str(i) for i in range(1, 7)]
    features += ["Asthma", "Obesity", "Smoking", "Diabetes", 
                 "Heart disease", "Hypertension"]
    features += ["Vaccination status" + str(i) for i in range(1, 4)]
    features_data.columns = features
    return features_data

In [4]:
def init_actions():
    actions = pd.read_csv("treatment_actions.csv")
    actions.columns = ["Treatment1", "Treatment2"]
    return actions 

In [5]:
def init_outcomes():
    """
    Initialize outcome data
    
    Post-Treatment Symptoms (10 bits): Past-Covid (Ignore), Covid+ (Ignore), 
    No-Taste/Smell, Fever, Headache, Pneumonia, Stomach, Myocarditis, 
    Blood-Clots, Death
    """
    outcomes = pd.read_csv("treatment_outcomes.csv")
    outcome_names = ["Past-Covid", "Covid+", "No-Taste/Smell", "Fever", "Headache", 
                      "Pneumonia", "Stomach", "Myocarditis", "Blood-Clots", "Death"]
    outcomes.columns = outcome_names
    return outcomes

In [6]:
# Fix dataset

In [7]:
observation_features = init_features("observation_features.csv")
data_obs = observation_features
actions = init_actions()
outcomes = init_outcomes()
treatment_features = init_features("treatment_features.csv")
data_treat = treatment_features
# The task said to ignore the two first columns
outcomes = outcomes.iloc[:, 2:]

outcome_names_new = [i + "_after" for i in outcomes.columns]
outcomes.columns = outcome_names_new

treatment = data_treat.join(actions).join(outcomes)
tmp1 = treatment.iloc[:, 0:13]
tmp2 = treatment.iloc[:, 141:]
# The three datasets for ex. 2 in one dataset, where all genes are omitted
treat_no_genes = tmp1.join(tmp2)

num_features = ["Age", "Income"]
num_df = treat_no_genes[num_features]
scaled_num_df = (num_df - num_df.mean()) / num_df.std()

treat_no_genes_scaled = treat_no_genes
treat_no_genes_scaled.iloc[:, 10] = scaled_num_df.iloc[:,0]
treat_no_genes_scaled.iloc[:, 12] = scaled_num_df.iloc[:,1]

# Remove column ""Covid-Positive" (because everyone have covid)
tmp1 = treat_no_genes.iloc[:, 0]
tmp2 = treat_no_genes.iloc[:, 2:]
treat_no_genes = pd.DataFrame(tmp1).join(tmp2)

In [8]:
# Looking at differnt treatments

In [9]:
# People with only treatment 1, 211 people
treat_1 = treat_no_genes[(treat_no_genes["Treatment1"] == 1) & (treat_no_genes["Treatment2"] == 0)]
# People with only treatment 2, 211 people
treat_2 = treat_no_genes[(treat_no_genes["Treatment2"] == 1) & (treat_no_genes["Treatment1"] == 0)]
# People with both treatments, 240 people
treat_both = treat_no_genes[(treat_no_genes["Treatment1"] == 1) & (treat_no_genes["Treatment2"] == 1)]
# People with no treatments, 215 people
treat_none = treat_no_genes[(treat_no_genes["Treatment1"] == 0) & (treat_no_genes["Treatment2"] == 0)]

In [10]:
# Number of people with different symptoms after

In [11]:
print(f"Number of people with different symtoms, total people is {treat_no_genes.shape[0]}")
print("--------------------------------------------------------------")
for s in outcomes.columns:
    print(f"People with symptom {s}: ", treat_no_genes[treat_no_genes[s] == 1].shape[0])

Number of people with different symtoms, total people is 877
--------------------------------------------------------------
People with symptom No-Taste/Smell_after:  23
People with symptom Fever_after:  18
People with symptom Headache_after:  1
People with symptom Pneumonia_after:  19
People with symptom Stomach_after:  3
People with symptom Myocarditis_after:  6
People with symptom Blood-Clots_after:  17
People with symptom Death_after:  8


In [12]:
# People with symtom before treatment compared to people with symptom after treatment

In [13]:
print(f"Number of people with different symtoms before and after treatment, total people is {treat_no_genes.shape[0]}")
print("-" * 90)
for sb, sa in zip(treat_no_genes.columns[1:9], outcomes.columns):
    print(f"People with symptom {sb} before treatment: ", treat_no_genes[treat_no_genes[sb] == 1].shape[0])
    print(f"People with symptom {sa} after treatment: ", treat_no_genes[treat_no_genes[sa] == 1].shape[0])
    print("-" * 60)

Number of people with different symtoms before and after treatment, total people is 877
------------------------------------------------------------------------------------------
People with symptom No-Taste/Smell before treatment:  49
People with symptom No-Taste/Smell_after after treatment:  23
------------------------------------------------------------
People with symptom Fever before treatment:  24
People with symptom Fever_after after treatment:  18
------------------------------------------------------------
People with symptom Headache before treatment:  7
People with symptom Headache_after after treatment:  1
------------------------------------------------------------
People with symptom Pneumonia before treatment:  34
People with symptom Pneumonia_after after treatment:  19
------------------------------------------------------------
People with symptom Stomach before treatment:  5
People with symptom Stomach_after after treatment:  3
----------------------------------------