In [1]:
import pandas as pd
import redcap
import requests
import io
import sys
from credentials import token_pid353
from credentials import redcap_api_url
sys.path.insert(0, '../toolbox/')
from general_tools import compare_similar_dataframes

In [2]:
def redcap_API_export(url,token):
    """
    all data API request
    """

    fields = {
        'token': token,
        'content': 'record',
        'format': 'csv',
        'type': 'flat'}

    r = requests.post(url, data=fields)
    
    df = pd.read_csv(io.StringIO(r.content.decode("utf-8")), index_col=0)

    return df

def wide_to_long(df_pid171):
    """
    REDCap PID171 is in wide format with unique sample ID's and PCR_target of n1 and n2 containing fields sars_cov2_below_lod (n1 and n2) and sars_cov2_avg_conc (n1 and n2).

    Transform long format: repeat sample ID's for n1 target and n2 target (PCR_target field). Single column of sars_cov2_below_lod and sars_cov2_avg_conc.

    """
    df_pid171 = df_pid171.reset_index().copy()

    #identify columns to melt, and all the rest
    melt_clms = ['n1_sars_cov2_avg_conc', 'n2_sars_cov2_avg_conc', 'n1_sars_cov2_below_lod', 'n2_sars_cov2_below_lod']
    not_melt_clms = df_pid171.columns[~df_pid171.columns.isin(melt_clms)]

    #perform melt for avg_conc and keep all other columns
    df_melt_conc = pd.melt(df_pid171, value_vars = ['n1_sars_cov2_avg_conc', 'n2_sars_cov2_avg_conc'], var_name = "pcr_target", value_name = 'sars_cov2_avg_conc', id_vars = not_melt_clms )
    #perform melt for below_lod and only keep the value column (below_lod)
    df_melt_lod = pd.melt(df_pid171, value_vars = ['n1_sars_cov2_below_lod', 'n2_sars_cov2_below_lod'], var_name = "pcr_target", value_name = 'sars_cov2_below_lod', id_vars = ["sample_id"] )

    #change the PCR_target column to only first 2 letters (n1 or n2)
    df_melt_lod["pcr_target"] = df_melt_lod["pcr_target"].str[0:2]
    df_melt_conc["pcr_target"] = df_melt_lod["pcr_target"].str[0:2]

    #merge the dataframes together
    df_pid171 = pd.merge(df_melt_conc, df_melt_lod, how = "inner", left_on = ["sample_id", "pcr_target"], right_on = ["sample_id", "pcr_target"])

    return df_pid171

### compare dataframes from native API export and pycap export


In [7]:
token_pid171 = "AB21CE90EF475E08AC11F92105A39690"
redcap_api_url = 'https://redcap.doh.wa.gov/api/'

#Native API
df_pid171 = redcap_API_export(redcap_api_url, token_pid171)

#PyCap
project = redcap.Project(url = redcap_api_url, token = token_pid171)
df_pid171_v2 = project.export_records(format_type = "df")

In [9]:
pd.testing.assert_frame_equal(df_pid171, df_pid171_v2)

In [8]:
unequal = compare_similar_dataframes(df_pid171, df_pid171_v2)

The shapes of Dataframe1 and Dataframe2 are identical: (857, 97)

The columns of Dataframe1 and Dataframe2 are identical, congrats!

The two dataframes are an exact match, congrats!


### Version1 of Wide-to-long transform:

In [3]:
df_pid171 = redcap_API_export('https://redcap.doh.wa.gov/api/', "AB21CE90EF475E08AC11F92105A39690")

df_pid171 = wide_to_long(df_pid171)

### Version2 of Wide-to-long transform:

In [None]:
token_pid171 = "AB21CE90EF475E08AC11F92105A39690"
redcap_api_url = 'https://redcap.doh.wa.gov/api/'
project = redcap.Project(url = redcap_api_url, token = token_pid171)
df_pid171_v2 = project.export_records(format_type = "df")

In [4]:
df_pid171_v2 = redcap_API_export('https://redcap.doh.wa.gov/api/', "AB21CE90EF475E08AC11F92105A39690")

In [5]:
df_pid171_v2 = df_pid171_v2.reset_index().copy()

df_conc_melt = df_pid171_v2.melt(id_vars = "sample_id", value_vars = ['n1_sars_cov2_avg_conc', 'n2_sars_cov2_avg_conc'], var_name = "pcr_target", value_name = 'sars_cov2_avg_conc')
df_lod_melt = df_pid171_v2.melt(id_vars = "sample_id", value_vars = [ 'n1_sars_cov2_below_lod', 'n2_sars_cov2_below_lod'], var_name = "pcr_target", value_name = 'sars_cov2_below_lod' )

df_conc_melt["pcr_target"] = df_conc_melt["pcr_target"].str[0:2]
df_lod_melt["pcr_target"] = df_lod_melt["pcr_target"].str[0:2]


df_melt = pd.merge(df_conc_melt, df_lod_melt, left_on = ["sample_id", "pcr_target"],
                                             right_on = ["sample_id", "pcr_target"],
                                             how = "inner")


melt_clms = ['n1_sars_cov2_avg_conc', 'n2_sars_cov2_avg_conc', 'n1_sars_cov2_below_lod', 'n2_sars_cov2_below_lod']
df_everything_else = df_pid171_v2[df_pid171_v2.columns[~df_pid171_v2.columns.isin(melt_clms)]]


df_complete = pd.merge(df_melt, df_everything_else, left_on = "sample_id",
                                                    right_on = "sample_id",
                                                   how = "left")


In [6]:
#df_complete = df_complete.reindex(sorted(df_columns), axis = 1)
#df_pid171 = df_pid171.reindex(sorted(df_columns), axis = 1)

missmatch = compare_similar_dataframes(df_complete, df_pid171)

The shapes of Dataframe1 and Dataframe2 are identical: (1714, 97)

The columns of Dataframe1 and Dataframe2 are identical, congrats!

The two dataframes are an exact match, congrats!


In [None]:
in_common = set(df_complete.columns) & set(df_pid171.columns)

in_common - set(missmatch)

In [None]:
len(missmatch)

In [None]:
df_complete[missmatch].dtypes == df_pid171[missmatch].dtypes

In [None]:
pd.testing.assert_series_equal(df_complete["cbod"], df_pid171["cbod"])

### Discovery that column CBOD is modified from original during the long to wide transform

# Appendix
