## Importing Packages and Source Code

In [None]:
import json
import os
import sys
## biom used to view Qiime Artifacts
import biom
%matplotlib inline 

In [None]:
## Import Source Code
from src.data import make_dataset
from src.features import build_features
from src.models import make_models
from src.visualizations import make_visualizations

In [None]:
## Creating paths to store temp and out data
if not os.path.exists("data/temp"):
    os.makedirs("data/temp")
if not os.path.exists("data/out"):
    os.makedirs("data/out")

In [None]:
## Used to format graph
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = [10, 5]

## Reading Data

In [None]:
## Obtaining file paths
with open("config/data-params.json") as fh:
    file_paths = json.load(fh)

In [None]:
feature_table = make_dataset.read_feature_table(file_paths["feature_table_path"])
metadata = make_dataset.read_metadata(file_paths["metadata_path"])

In [None]:
biom_table = make_dataset.feature_table_biom_view(feature_table)
print(biom_table.head())

In [None]:
metadata.head()

# Create Features

In [None]:
## Obtaining file paths
with open("config/feature-params.json") as fh:
    feature_params = json.load(fh)

In [None]:
organized_metadata = build_features.organize_metadata(metadata, biom_table.ids(), **feature_params)

In [None]:
organized_metadata[0]

In [None]:
organized_metadata[1]

# EDA

In [None]:
make_visualizations.create_bar_col_binary(organized_metadata[0], 'ckd_v2')

In [None]:
make_visualizations.disease_counts_graph(organized_metadata[0], feature_params['disease_cols'])

In [None]:
make_visualizations.co_occurence_graph(organized_metadata[0], feature_params['disease_cols'])

In [None]:
organized_metadata[0][feature_params['disease_cols']].sum(axis=1).value_counts()

In [None]:
make_visualizations.total_disease_count_graphs(organized_metadata[0],feature_params['disease_cols'])

# Model Building

In [None]:
## Obtaining model params
with open("config/model-params.json") as fh:
    model_params = json.load(fh)

In [None]:
# need to return updated metadata path
qiime_metadata_tf = make_dataset.read_qiime_metadata("data/temp/final_metadata_tf.tsv")
qiime_metadata_tf

In [None]:
ckd_v2_model = make_models.sample_classifier_single_disease(feature_table, qiime_metadata_tf.get_column('ckd_v2'))
ckd_v2_model