In [None]:
#Step 1: Import modules and connect to the database
import pandas as pd
import numpy as np
import pyodbc
import warnings
warnings.filterwarnings('ignore')

from IPython.display import HTML
config = dict(server='ABR-RIPLEYA-D1',
    port=      1433,
    database=  'neumodxRawDataDB',
    )

conn_str = ('SERVER={server};' +
            'Database={database};' +
            'TRUSTED_CONNECTION=yes')

conn = pyodbc.connect(
    r'DRIVER={SQL Server};' +
    conn_str.format(**config)
    )

In [None]:
##Step 2:  Load Consolidated Data from Gen 1.5 DB 


environment='HPV_RPT-8430_Original_Data_3'

def getSQLData_summary_coc(environment):
    query = ("Select * from [dbo].[summary_coc]"+
             " where  [Environment] like '%"+environment+"%'")
    print("Query:", query)
    df = pd.read_sql(query, conn)
    print("Length of Raw Data Frame: "+ str(len(df)))
    
    df = df.loc[~df['N500 Serial Number'].isnull()]
    print(df['N500 Serial Number'].unique())
    df.drop_duplicates(subset=['Sample ID', 'Start Date/Time'],inplace=True)
    print("Length of Filtered Data Frame: "+ str(len(df)))
    return df




OriginalData = getSQLData_summary_coc(environment)


In [None]:
##Step 3:  Change Connection to NIMS (Gen 2) Database
from IPython.display import HTML
config = dict(server='ABR-RIPLEYA-L1',
    port=      1433,
    database=  'NIMS_New',
    )

conn_str = ('SERVER={server};' +
            'Database={database};' +
            'TRUSTED_CONNECTION=yes')

conn = pyodbc.connect(
    r'DRIVER={SQL Server};' +
    conn_str.format(**config)
    )

In [None]:
##Step 4: Get Final Data (1.9.2.6 Data processed with ADF 4.2.1) 

def get_NIMS_ChainOfCustody(trialid):
    query = "SELECT SampleTrials.SampleUId, Samples.[Sample ID], ChainOfCustodySets.[Start Date/Time] FROM ChainOfCustodySets Left JOIN SampleTrials ON SampleTrials.SampleUId=ChainOfCustodySets.SampleUId AND SampleTrials.TrialId = '"+trialid+"' Left JOIN Samples ON ChainOfCustodySets.SampleUId = Samples.UId ;"
    df = pd.read_sql(query, conn)
    return df

NIMSData = get_NIMS_ChainOfCustody('BC4699B0-DCB0-4ED2-ABCB-17628D7DEC22')

In [None]:
##Step 5: Set Index to Shared Properties between Original & NIMS Data
NIMSData.set_index(['Sample ID', "Start Date/Time"], inplace=True)
OriginalData.set_index(['Sample ID', "Start Date/Time"], inplace=True)

In [None]:
##Step 6: Combined Original Data and NIMS Data so that ChainOfCustodyInfo has SampleUID Column
ChainOfCustodyInfo = NIMSData.join(OriginalData)

In [None]:
consumables_list = ['Capture Plate Barcode',
 'Buffer Barcode',
 'Test Strip NeuMoDx Barcode',
 'Pcr Cartridge Barcode',
 'Release Reagent Barcode',
 'Wash Reagent Barcode']

for col in consumables_list:
    ChainOfCustodyInfo[col.replace('Barcode', 'Lot')] = ChainOfCustodyInfo[col].str[18:24]
    ChainOfCustodyInfo[col.replace('Barcode', 'Serial')] = ChainOfCustodyInfo[col].str[27:32]

In [None]:
[x for x in ChainOfCustodyInfo if "Software" in x]

In [None]:
ChainOfCustodyInfo = ChainOfCustodyInfo[['SampleUId', 'Test Guid', 'Software Version', 'Assay Version', 'N500 Serial Number', 'PCR Module Serial', 'Pcr Cartridge Lane', 'Pcr Cartridge Barcode']+
                   [x.replace('Barcode', 'Lot') for x in consumables_list]+
                   [x.replace('Barcode', 'Serial') for x in consumables_list]]

In [None]:
ChainOfCustodyInfo

In [None]:
##Step 7: Get Channel Data for Original Data

config = dict(server='ABR-RIPLEYA-D1',
    port=      1433,
    database=  'neumodxRawDataDB',
    )

conn_str = ('SERVER={server};' +
            'Database={database};' +
            'TRUSTED_CONNECTION=yes')

conn = pyodbc.connect(
    r'DRIVER={SQL Server};' +
    conn_str.format(**config)
    )

def getSQLData_channel_summary(environment, testGuids):
    query = ("Select * from [dbo].[channel_summary]"+
             "where  [Environment] like '%"+environment+"%'")
    print("Query:", query)
    df = pd.read_sql(query, conn)
    print("Length of Raw Data Frame: "+ str(len(df)))
    
    df.set_index('Test Guid', inplace=True)
    df = df.loc[testGuids,:]
    df.reset_index(inplace=True)
    df.drop_duplicates(['Test Guid', 'Channel'], inplace=True)
    print("Length of Filtered Data Frame: "+ str(len(df)))
    df.set_index(['Test Guid', 'Channel'],inplace=True)
    return df

ChannelDataOriginal = getSQLData_channel_summary(environment, [x for x in ChainOfCustodyInfo['Test Guid'].unique().tolist() if pd.isnull(x)==False])
ChannelDataOriginal['Localized Result'] = np.where(ChannelDataOriginal['Localized Result'].isnull(), ChannelDataOriginal['Target Result'], ChannelDataOriginal['Localized Result'])

##Step 8: Subset ChannelDataOriginal to include only fields that are relavant to describing the amplification state.
ChannelDataOriginal = ChannelDataOriginal[['Localized Result', 'Ct', 'End Point Fluorescence', 'Max Peak Height', 'EPR', 'Baseline First Cycle', 'Baseline Last Cycle', 'Baseline Slope']]


In [None]:
##Step 9: Merge together ChainOfCustodyInfo with ChannelDataOriginal for one common dataframe to combine with the Line Data.
ChainOfCustodyInfo.dropna(subset=['Test Guid'],inplace=True)
ChainOfCustodyInfo.drop_duplicates('Test Guid',inplace=True)
OriginalDataAll = ChannelDataOriginal.join(ChainOfCustodyInfo.set_index('Test Guid'))

In [None]:
##Step 10: Rename Channels in OriginalDataAll for merge with Line Data, annotate other columns to distinguish them as "Original" Columns
OriginalDataAll = OriginalDataAll.reset_index().replace({'Green':'Green_470_510',
                                                         'Yellow':'Yellow_530_555',
                                                         'Red':'Red_625_660',
                                                         'Orange':'Orange_585_610'}).set_index(['SampleUId','Channel'])
for col in OriginalDataAll.columns:
    OriginalDataAll.rename({col:'Original '+col}, axis=1, inplace=True)

In [None]:
##Step 11: Read LineData from CSV, Merge with OriginalDataAll, and Export to CSV.
lineData = pd.read_csv('P:/Users/Aaron/HPV Data Analysis/RPT-8430_CompiledData_Final.csv')#.set_index(["Sample ID", 'StartDateTime'],inplace=True)
lineData.rename({'SampleID':'Sample ID', 'StartDateTime':'Start Date/Time'}, axis=1, inplace=True)
lineData.reset_index(inplace=True)
lineData['SampleUId'] = lineData['SampleUId'].str.upper()
lineData.set_index(['SampleUId','Channel'],inplace=True)

FinalData = lineData.join(OriginalDataAll, rsuffix='_Original')
FinalData[~FinalData['Original N500 Serial Number'].isnull()].to_csv('RPT_8430_FinalData_with_OriginalData.csv')

In [None]:
FinalData.reset_index(inplace=True)


In [None]:
FinalData.set_index(['Channel', 'ProcessingStep'],inplace=True)

In [None]:
def checkCartridgeNeighbors(data, cartridgeField='CartridgeId', cartridgeLaneField='PcrCartridgeLane', resultField='LocalizedResult'):
    """
    Finds and returns the specified field result associated from the neighboring cartridge lanes (left & right) of each sample included in the DataFrame.
    Parameters
    ----------
    data: pandas.DataFrame
        Pandas Dataframe used as input.
    cartridgeField: str
        Field used to identify the NeuMoDx Cartridge used for processing in data.
    cartridgeLaneField: str
        Field used to identify the Cartridge Lane used for sample processing in data.
    resultField: str
        Field value to return from Neighboring Lanes.
    """

    ##Determine Original Index
    original_index = data.index.names
    

    ##Reset Index (to ensure index is always common no matter what input).
    data.reset_index(inplace=True)

    ##Add "LeftNeighbor" & "RightNeighbor" Result Columns
    data.loc[:, 'LeftNeighbor'+resultField] = np.nan
    data.loc[:, 'RightNeighbor'+resultField] = np.nan

    for idx in data.index:

        ##Get Cartridge Barcode & Lane:
        cartridgeid = data.loc[idx, cartridgeField]
        lane = data.loc[idx, cartridgeLaneField]
        channel = data.loc[idx, 'Channel']


        ##Find Left Cartridge Neighbor & Get Result:
        leftNeighbor = data.loc[((data[cartridgeField]==cartridgeid)&
                                           (data[cartridgeLaneField]==lane-1)&
                                           (data['Channel']==channel)),:]
        
        if len(leftNeighbor)>0:
            leftNeighborResult = leftNeighbor[resultField].values[0]
        else:
            leftNeighborResult = np.nan

        ##Find Right Cartridge Neighbor & Get Result:

        rightNeighbor = data.loc[((data[cartridgeField]==cartridgeid)&
                                           (data[cartridgeLaneField]==lane+1)&
                                           (data['Channel']==channel)),:]
        
        if len(rightNeighbor)>0:
            rightNeighborResult = rightNeighbor[resultField].values[0]
        else:
            rightNeighborResult = np.nan
        
        ##Fill in LeftNeighbor and RightNeighbor Results into appropriate DataFrame Columns
        data.loc[idx, 'LeftNeighbor'+resultField] = leftNeighborResult
        data.loc[idx, 'RightNeighbor'+resultField] = rightNeighborResult

    data.set_index(original_index, inplace=True)
    return data




        
        
    
    


In [None]:
YellowRawData = checkCartridgeNeighbors(FinalData.loc[('Yellow_530_555','Raw'),:].sort_values(['Original Pcr Cartridge Barcode', 'Original Pcr Cartridge Lane']), cartridgeField='Original Pcr Cartridge Barcode', cartridgeLaneField='Original Pcr Cartridge Lane')


In [None]:
YellowRawData = checkCartridgeNeighbors(YellowRawData.sort_values(['Original Pcr Cartridge Barcode', 'Original Pcr Cartridge Lane']), cartridgeField='Original Pcr Cartridge Barcode', cartridgeLaneField='Original Pcr Cartridge Lane')


In [None]:
YellowRawData = YellowRawData[~YellowRawData['Original N500 Serial Number'].isnull()]

In [None]:
YellowRawData[['Original Software Version', 'Original Assay Version', 'Sample ID']].groupby(['Original Software Version', 'Original Assay Version']).count().sort_index()

In [None]:
YellowRawData[['AssayVersion', 'ReplicateNumber']].groupby(['AssayVersion']).count().sort_index()

In [None]:
YellowRawData[['NeuMoDxSoftwareVersion', 'AssayVersion']]