import libraries

In [1]:
# Load Python packages
import import_ipynb
import os
import pandas as pd
import numpy as np
from tqdm import tqdm
from matplotlib import pyplot as plt
from rpy2 import robjects
from rpy2.robjects.packages import importr
from utils import load_datasets, plot_roc, make_meta, to_otus

# Load R & packages
%load_ext rpy2.ipython
%R suppressMessages(library(textmineR))
%R suppressMessages(library(FEAST))

# basic configuration for pyplot & numpy
plt.style.use(['science','no-latex'])
np.random.seed(1)

importing Jupyter notebook from utils.ipynb


load datasets

In [2]:
datasets = load_datasets()

In [3]:
def make_meta(otus_with_status):
    sources_ix = np.random.rand(1352) < 0.9
    sinks_ix = ~sources_ix
    meta = pd.DataFrame(data={'Env': otus_with_status['Status'].tolist(), 
                              'SourceSink': 'Source'}, 
                        index=otus_with_status['SampleID'])
    meta.loc[sinks_ix, 'SourceSink'] = 'Sink'
    meta['id'] = 1
    meta.loc[meta['SourceSink'] == 'Sink', 'id'] = list(range(1, sum(sinks_ix) + 1))
    return meta

# Generate meta table using information from '3F' table, save to ../Tmp/...
meta = make_meta(datasets['3F'].copy())
meta.to_csv('../Tmp/meta.csv')

# Transform raw otus table to FEAST & JSD-acceptable otus
otus = to_otus(datasets)
a_101f = otus['整数-101F'].fillna(0).astype(int)
b_3f = otus['3F'].fillna(0).astype(int)
c_top5f = otus['top5F'].fillna(0).astype(int)
d_top3f = otus['top3F'].fillna(0).astype(int)

do sourcetracking

In [None]:
%%time
%%R -i meta -i a_101f -i b_3f -i c_top5f -i d_top3f

# Just keep sink rows and source columns
ids.sources = rownames(meta[meta$SourceSink == 'Source', ]) 
ids.sinks = rownames(meta[meta$SourceSink == 'Sink', ])

# Prepare for iteration over datasets
multi_otus <- list(F5_top=c_top5f, F3_top=d_top3f, F101=a_101f, F3=b_3f)
names <- c("F5_top", "F3_top", "F101", "F3")

# Iteration
for(i in 1:length(multi_otus)){
    otus <- as.matrix(multi_otus[[i]])#[1:500, ]
    otus <- otus[rowSums(otus) > 0, ]
    
    # Calculating source proportions and save to ../Tmp/...
    FEAST(C = otus, metadata = meta, different_sources_flag = 0, 
      COVERAGE = 1000, dir_path = "../Tmp/", outfile=names[i])
    
    # Calculating JSD matrix and save to ../Tmp/...
    dis.ma <- CalcJSDivergence(x=as.matrix(multi_otus[[i]]))[ids.sinks, ids.sources]
    write.csv(dis.ma, paste0('../Tmp/', names[i], 'jsd.distance.csv'), 
              row.names=T, col.names=T)
}

[1] "Calculating mixinig proportions for sink 10"
[1] "Calculating mixinig proportions for sink 20"
[1] "Calculating mixinig proportions for sink 30"
[1] "Calculating mixinig proportions for sink 40"
[1] "Calculating mixinig proportions for sink 50"
[1] "Calculating mixinig proportions for sink 60"
[1] "Calculating mixinig proportions for sink 70"
[1] "Calculating mixinig proportions for sink 80"
[1] "Calculating mixinig proportions for sink 90"
[1] "Calculating mixinig proportions for sink 100"
[1] "Calculating mixinig proportions for sink 110"
[1] "Calculating mixinig proportions for sink 120"
[1] "Calculating mixinig proportions for sink 129"
[1] "Calculating mixinig proportions for sink 10"
[1] "Calculating mixinig proportions for sink 20"
[1] "Calculating mixinig proportions for sink 30"
[1] "Calculating mixinig proportions for sink 40"
[1] "Calculating mixinig proportions for sink 50"


From cffi callback <function _processevents at 0x7f70d1bb0160>:
Traceback (most recent call last):
  File "/usr/local/lib/python3.8/dist-packages/rpy2/rinterface_lib/callbacks.py", line 268, in _processevents
    try:
KeyboardInterrupt


[1] "Calculating mixinig proportions for sink 60"


evaluation

In [None]:
# plot_roc()