In [183]:
%matplotlib inline

In [185]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import subprocess

In [187]:
%run ../src/data_processing.py

In [189]:
subprocess.run(["python", "../src/data_processing.py"])

CompletedProcess(args=['python', '../src/data_processing.py'], returncode=0)

In [191]:
asthma_feature_dataset = pd.read_csv("../data/feature_dataset.csv")

In [193]:
asthma_feature_dataset

Unnamed: 0,Age,Gender,Ethnicity,EducationLevel,BMI,Smoking,PhysicalActivity,DietQuality,SleepQuality,PollutionExposure,...,ShortnessOfBreath,ChestTightness,Coughing,NighttimeSymptoms,ExerciseInduced,Diagnosis,FVC_Wheezing,FEV1_Coughing,Dust_NighttimeSymptoms,RespiratoryDistress
0,63,0,1,0,17.5150,0,0.894448,5.488696,8.701003,7.388481,...,0,1,0,0,1,0,0.000000,-0.000000,-0.000000,1
1,26,1,2,2,22.5180,0,5.897329,6.341014,5.153966,1.969838,...,0,0,1,1,1,0,-1.564256,-0.407132,0.560684,3
2,57,0,2,1,17.5150,0,6.739367,9.196237,6.840647,1.460593,...,1,1,0,1,1,0,0.983019,-0.000000,0.162295,3
3,40,1,2,1,37.4905,0,1.404503,5.826532,4.253036,0.581905,...,0,1,1,1,0,0,-1.105641,0.561114,-0.355611,4
4,61,0,0,3,17.5150,0,4.604493,3.127048,9.625799,0.980875,...,1,1,0,0,1,0,-0.516586,0.000000,0.000000,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2387,43,1,0,2,27.5085,0,3.019854,6.119637,8.300960,2.483829,...,1,0,0,0,1,1,0.000000,0.000000,-0.000000,0
2388,18,1,0,1,22.5180,0,5.805180,4.386992,7.731192,7.733983,...,0,0,1,1,0,1,0.000000,-1.642920,0.519779,2
2389,54,0,3,2,37.4905,0,4.735169,8.214064,7.483521,2.794847,...,0,1,1,0,1,1,-0.302584,-1.001130,0.000000,3
2390,46,1,0,2,22.5180,0,9.672637,7.362861,6.717272,9.448862,...,1,1,0,1,1,0,-0.000000,0.000000,0.024327,2


In [195]:
# Scale the features

features_to_scale = ["LungFunctionFVC", "LungFunctionFEV1", "DustExposure"]
asthma_feature_dataset = scale_features(asthma_feature_dataset, features_to_scale)

In [197]:
# Create interaction terms

interaction_pairs = [("LungFunctionFVC", "Wheezing"), ("LungFunctionFEV1", "Coughing")]
asthma_feature_dataset = create_interaction_terms(asthma_feature_dataset, interaction_pairs)

In [199]:
# Drop highly correlated features

asthma_feature_dataset = drop_correlated_features(asthma_feature_dataset, threshold=0.8)

In [201]:
asthma_feature_dataset

Unnamed: 0,Age,Gender,Ethnicity,EducationLevel,BMI,Smoking,PhysicalActivity,DietQuality,SleepQuality,PollutionExposure,...,ShortnessOfBreath,ChestTightness,Coughing,NighttimeSymptoms,ExerciseInduced,Diagnosis,FVC_Wheezing,FEV1_Coughing,Dust_NighttimeSymptoms,RespiratoryDistress
0,63,0,1,0,17.5150,0,0.894448,5.488696,8.701003,7.388481,...,0,1,0,0,1,0,0.000000,-0.000000,-0.000000,1
1,26,1,2,2,22.5180,0,5.897329,6.341014,5.153966,1.969838,...,0,0,1,1,1,0,-1.564256,-0.407132,0.560684,3
2,57,0,2,1,17.5150,0,6.739367,9.196237,6.840647,1.460593,...,1,1,0,1,1,0,0.983019,-0.000000,0.162295,3
3,40,1,2,1,37.4905,0,1.404503,5.826532,4.253036,0.581905,...,0,1,1,1,0,0,-1.105641,0.561114,-0.355611,4
4,61,0,0,3,17.5150,0,4.604493,3.127048,9.625799,0.980875,...,1,1,0,0,1,0,-0.516586,0.000000,0.000000,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2387,43,1,0,2,27.5085,0,3.019854,6.119637,8.300960,2.483829,...,1,0,0,0,1,1,0.000000,0.000000,-0.000000,0
2388,18,1,0,1,22.5180,0,5.805180,4.386992,7.731192,7.733983,...,0,0,1,1,0,1,0.000000,-1.642920,0.519779,2
2389,54,0,3,2,37.4905,0,4.735169,8.214064,7.483521,2.794847,...,0,1,1,0,1,1,-0.302584,-1.001130,0.000000,3
2390,46,1,0,2,22.5180,0,9.672637,7.362861,6.717272,9.448862,...,1,1,0,1,1,0,-0.000000,0.000000,0.024327,2


In [203]:
# Apply preprocessing to the data

df_preprocessed = preprocess_data(asthma_feature_dataset)

In [205]:
asthma_feature_dataset

Unnamed: 0,Age,Gender,Ethnicity,EducationLevel,BMI,Smoking,PhysicalActivity,DietQuality,SleepQuality,PollutionExposure,...,Coughing,NighttimeSymptoms,ExerciseInduced,Diagnosis,FVC_Wheezing,FEV1_Coughing,Dust_NighttimeSymptoms,RespiratoryDistress,LungFunctionFVC_Wheezing_interaction,LungFunctionFEV1_Coughing_interaction
0,63,0,1,0,17.5150,0,0.894448,5.488696,8.701003,7.388481,...,0,0,1,0,0.000000,-0.000000,-0.000000,1,0.000000,-0.000000
1,26,1,2,2,22.5180,0,5.897329,6.341014,5.153966,1.969838,...,1,1,1,0,-1.564256,-0.407132,0.560684,3,-1.564256,-0.407132
2,57,0,2,1,17.5150,0,6.739367,9.196237,6.840647,1.460593,...,0,1,1,0,0.983019,-0.000000,0.162295,3,0.983019,-0.000000
3,40,1,2,1,37.4905,0,1.404503,5.826532,4.253036,0.581905,...,1,1,0,0,-1.105641,0.561114,-0.355611,4,-1.105641,0.561114
4,61,0,0,3,17.5150,0,4.604493,3.127048,9.625799,0.980875,...,0,0,1,0,-0.516586,0.000000,0.000000,2,-0.516586,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2387,43,1,0,2,27.5085,0,3.019854,6.119637,8.300960,2.483829,...,0,0,1,1,0.000000,0.000000,-0.000000,0,0.000000,0.000000
2388,18,1,0,1,22.5180,0,5.805180,4.386992,7.731192,7.733983,...,1,1,0,1,0.000000,-1.642920,0.519779,2,0.000000,-1.642920
2389,54,0,3,2,37.4905,0,4.735169,8.214064,7.483521,2.794847,...,1,0,1,1,-0.302584,-1.001130,0.000000,3,-0.302584,-1.001130
2390,46,1,0,2,22.5180,0,9.672637,7.362861,6.717272,9.448862,...,0,1,1,0,-0.000000,0.000000,0.024327,2,-0.000000,0.000000
