## Importing Packages and Source Code

In [None]:
import json
import os
import sys
## biom used to view Qiime Artifacts
import biom
%matplotlib inline 

In [None]:
## Import Source Code
from src.data import make_dataset
from src.features import build_features, metrics_analysis
from src.models import make_models, evaluate_models
from src.visualizations import make_visualizations

In [None]:
## Creating paths to store temp and out data
if not os.path.exists("data/temp"):
    os.makedirs("data/temp")
if not os.path.exists("data/out"):
    os.makedirs("data/out")

In [None]:
## Used to format graph
import matplotlib.pyplot as plt

## Reading Data

In [None]:
## Obtaining file paths
with open("config/data-params.json") as fh:
    file_paths = json.load(fh)

In [None]:
feature_table = make_dataset.read_feature_table(file_paths["feature_table_path"])
metadata = make_dataset.read_metadata(file_paths["metadata_path"])

In [None]:
biom_table = make_dataset.feature_table_biom_view(feature_table)
print(biom_table.head())

In [None]:
metadata.head()

# Create Features

In [None]:
## Obtaining file paths
with open("config/feature-params.json") as fh:
    feature_params = json.load(fh)

In [None]:
organized_metadata = build_features.organize_metadata(metadata, biom_table.ids(), **feature_params)

In [None]:
organized_metadata[0]

In [None]:
organized_metadata[1]

# EDA

In [None]:
make_visualizations.create_bar_col_binary(organized_metadata[0], 'ckd_v2')

In [None]:
organized_metadata[0]

In [None]:
diseases_cols = {
    'abdominal_obesity_ncep_v2': 'Obesity',
    'diabetes2_v2': 'Diabetes',
    'dyslipidemia_v2': 'Dyslipidemia',
    'hypertension2_v2': 'Hypertension',
    'ckd_v2': 'CKD',
    'precvd_v2': 'PreCVD',
    'elevated_bp_selfmeds_v2': 'Elevated_bp',
}

In [None]:
make_visualizations.disease_counts_graph(organized_metadata[0].rename(columns=diseases_cols), disease_cols=diseases_cols.values())

In [None]:
make_visualizations.co_occurence_graph(organized_metadata[0], feature_params['disease_cols'])

In [None]:
organized_metadata[0][feature_params['disease_cols']].sum(axis=1).value_counts()

In [None]:
make_visualizations.total_disease_count_graph(organized_metadata[0],feature_params['disease_cols'])

# Model Building

In [None]:
## Obtaining model params
with open("config/model-params.json") as fh:
    model_params = json.load(fh)

In [None]:
# need to return updated metadata path
qiime_metadata_tf = make_dataset.read_qiime_metadata("data/temp/final_metadata_tf.tsv")
qiime_metadata_tf

In [None]:
filtered_table = make_dataset.filter_feature_table(feature_table, 4, qiime_metadata_tf)
filtered_table.view(biom.Table)

In [None]:
binary_relevance_model = make_models.binary_relevance_model(filtered_table, qiime_metadata_tf, feature_params['disease_cols'])

In [None]:
disease_accuracy_scores = evaluate_models.binary_relevance_accuracy_scores(binary_relevance_model, feature_params['disease_cols'])

In [None]:
make_visualizations.binary_relevance_accuracy_scores_graph(disease_accuracy_scores)

# Permanova test

In [None]:
from qiime2 import Artifact
from qiime2.plugins.diversity.pipelines import core_metrics_phylogenetic
from qiime2.plugins.feature_table.methods import rarefy

In [None]:
tree_artifact = Artifact.load('data/raw/tree_file.qza')

In [None]:
metrics = metrics_analysis.extract_core_metrics(filtered_table, 7930, metadata = qiime_metadata_tf, phylogeny = tree_artifact)

In [None]:
metrics_analysis.permanova_test_all_diseases(metrics.unweighted_unifrac_distance_matrix,metrics.weighted_unifrac_distance_matrix,qiime_metadata_tf,feature_params['disease_cols'])

# ML With Ordination Data

In [None]:
df = qiime_metadata_tf.to_dataframe()

In [None]:
import pandas as pd

In [None]:
df['hispanic_origin'] = df['hispanic_origin'].astype(str)

In [None]:
import qiime2

In [None]:
qiime_metadata_beta_group = qiime2.Metadata(df)

In [None]:
import skbio

In [None]:
core_metrics.weighted_unifrac_pcoa_results.view(skbio.OrdinationResults).samples

In [None]:
df = core_metrics.bray_curtis_pcoa_results.view(skbio.OrdinationResults).samples

In [None]:
metadata = qiime_metadata_beta_group.to_dataframe().loc[df.index]
metadata

In [None]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split

In [None]:
clf = GradientBoostingClassifier()

In [None]:
X_train, X_test, y_train, y_test = train_test_split(df, metadata['ckd_v2'], test_size=0.3, random_state=100)

In [None]:
clf.fit(X_train, y_train)

In [None]:
clf.score(X_test, y_test)