## Importing Packages and Source Code

In [None]:
import json
import os
import sys
## biom used to view Qiime Artifacts
import biom
%matplotlib inline 

In [None]:
## Import Source Code
from src.data import make_dataset
from src.features import build_features, metrics_analysis
from src.models import make_models, evaluate_models
from src.visualizations import make_visualizations, dimensionality_analysis

In [None]:
## Creating paths to store temp and out data
if not os.path.exists("data/temp"):
    os.makedirs("data/temp")
if not os.path.exists("data/out"):
    os.makedirs("data/out")

In [None]:
## Used to format graph
import matplotlib.pyplot as plt

## Reading Data

In [None]:
## Obtaining file paths
with open("config/data-params.json") as fh:
    file_paths = json.load(fh)

In [None]:
# Reading Data
feature_table = make_dataset.read_feature_table(file_paths["feature_table_path"])
metadata = make_dataset.read_metadata(file_paths["metadata_path"])

In [None]:
biom_table = make_dataset.feature_table_biom_view(feature_table)
print(biom_table.head())

In [None]:
metadata.head()

# Create Features

In [None]:
## Obtaining file paths
with open("config/feature-params.json") as fh:
    feature_params = json.load(fh)

In [None]:
# Organizing metadata
organized_metadata = build_features.organize_metadata(metadata, biom_table.ids(), **feature_params)

In [None]:
organized_metadata[0]

In [None]:
organized_metadata[1]

# EDA

In [None]:
make_visualizations.create_bar_col_binary(organized_metadata[1], 'ckd_v2')

In [None]:
diseases_cols = {
    'abdominal_obesity_ncep_v2': 'Obesity',
    'ckd_v2': 'Chronic Kidney Disease',
    'diabetes2_v2': 'Diabetes',
    'precvd_v2': 'Pre-CVD',
    'elevated_bp_selfmeds_v2': 'Elevated Blood Pressure',
    'dyslipidemia_v2': 'Dyslipidemia'
}

In [None]:
organized_metadata[0]

In [None]:
make_visualizations.disease_counts_graph(organized_metadata[0].rename(columns=diseases_cols), disease_cols=diseases_cols.values())

In [None]:
organized_metadata[0][feature_params['disease_cols']].sum(axis=1).value_counts().sort_values(ascending=False)

In [None]:
make_visualizations.total_disease_count_graph(organized_metadata[0],feature_params['disease_cols'])

# Model Building

In [None]:
## Obtaining model params
with open("config/model-params.json") as fh:
    model_params = json.load(fh)

In [None]:
precvd_undersample = organized_metadata[1][['precvd_v2']]
precvd_undersample

In [None]:
# Balancing precvd classes
balanced_precvd_df = build_features.balance_precvd(organized_metadata[1])

In [None]:
qiime_metadata_tf = make_dataset.read_qiime_metadata("data/temp/final_metadata_tf.tsv")
qiime_metadata_tf

In [None]:
filtered_table = make_dataset.filter_feature_table(feature_table, 4, qiime_metadata_tf)
filtered_table.view(biom.Table)

In [None]:
binary_relevance_model = make_models.binary_relevance_model(filtered_table, qiime_metadata_tf,balanced_precvd_df, model_params['disease_targets'])

In [None]:
disease_accuracy_scores = evaluate_models.binary_relevance_accuracy_scores(binary_relevance_model, feature_params['disease_cols'])
disease_accuracy_scores

In [None]:
make_visualizations.binary_relevance_accuracy_scores_graph(disease_accuracy_scores)