In [None]:
import os
import sys
import warnings
import pandas as pd
sys.path.append('modules')
from transform_dst_mod import transform_dst
from transform_specimen_mod import transform_specimen

warnings.filterwarnings("ignore")
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)

In [None]:
dst = pd.read_csv('Mali/data/interm/Mali_jan_3_dst.csv')
culture = pd.read_csv('Mali/data/interm/Mali_jan_4_culture.csv')
specimen = pd.read_csv('Mali/data/interm/Mali_jan_1_specimen.csv')

CaseBrowser_prod_data = pd.read_csv('Mali/data/raw/Mali_Prod_data_01_16_24.csv')

registrationdate = CaseBrowser_prod_data[['patient_local_identifier', 'registrationdate']]
registrationdate.rename(columns={'patient_local_identifier':'identifier'}, inplace=True)

In [None]:
# Creating the containeridentifier so that it can be used to merge with the specimen dataframe.
containeridentifier = specimen[['patient_local_identifier', 'spec_local_identifier', 'collected_date']]

# Change the column names to match the other dataframes.
containeridentifier.rename(columns={'patient_local_identifier':'specimen', 
                                    'spec_local_identifier':'specimen local',
                                    'collected_date':'date'}, inplace=True)

In [None]:
def transform_culture(culture_df):
    culture_df.rename(columns={'patient_local_Identifier':'identifier', 
                        'spec_local_identifier':'containeridentifier', 
                        'collected_date':'issued',
                        'result':'value'}, inplace=True)
    
    # For some reason this column name did not want to change unless I did it like this. 
    culture_df.rename(columns={culture_df.columns[3]: "culturetype"}, inplace = True)
    
    culture_df['value'] = culture_df['value'].replace(
        to_replace=['1 to 19', '20 to 100', '20 to100', '100 to 200', 'More than 200', 'Positive', 'Negative', 'Unknown result', 'Study in progress', 'Not done', 'Nonspecific microflora', 'MOTT'],
        value=['singleColony', '1+', '1+', '2+', '3+', 'positive', 'negative', 'unknownData', 'unfinishedResult', 'notDone', 'contamination', 'mott']
    )
    
    culture_df['culturetype'] = culture_df['culturetype'].replace(
        to_replace=['Liquid', 'Solid', 'Not specified'],
        value=['liquid', 'solid', 'notSpecified']
    )
    
    culture_df['issued'] = pd.to_datetime(culture_df['issued'])
    culture_df['issued'] = culture_df['issued'].dt.strftime('%Y-%m-%d %H:%M:%S')
    
    new_order_culture = ['identifier', 'containeridentifier', 'issued', 'value', 'culturetype']
    culture_df = culture_df.reindex(columns=new_order_culture)
    
    return culture_df

In [None]:
# Make a function that takes 3 parameters which are a dataframe and two column names.
# The function will strip the dash from the two columns.
def strip_dash_2(df, col1, col2):
    df[col1] = df[col1].str.replace('-', '')
    df[col2] = df[col2].str.replace('-', '')
    return df

In [None]:
culture.head(20)

In [None]:
processed_culture = transform_culture(culture)

In [None]:
processed_culture = strip_dash_2(processed_culture, 'identifier', 'containeridentifier')

In [None]:
processed_culture.head(20)

In [None]:
# Sent this to Nik so that he can send it back to Mali
# processed_culture.to_csv('Mali_jan_culture.csv', index=False)

In [None]:
containeridentifier.head(10)

In [None]:
dst.head(20)

In [None]:
dst = strip_dash_2(dst, 'specimen', 'specimen local')

In [None]:
# Drop the specimen local column because it is not needed.
dst.drop(columns=['specimen local'], inplace=True)

In [None]:
dst = pd.merge(dst, containeridentifier, on='specimen')

In [None]:
dst.head(20)

In [None]:
dst.info()

In [None]:
# Moving the column to the second position.
col_to_move = 'specimen local'
new_position = 2

# Move the column
column_to_move_data = dst.pop(col_to_move)
dst.insert(new_position, col_to_move, column_to_move_data)

In [None]:
processed_dst = transform_dst(dst, registrationdate)

In [None]:
processed_dst.info()

In [None]:
dst_df = pd.merge(processed_dst, registrationdate, on='identifier')
dst_df['registrationdate'] = pd.to_datetime(dst_df['registrationdate'])
dst_df['registrationdate'] = dst_df['registrationdate'].dt.strftime('%Y-%m-%d %H:%M:%S')

In [None]:
dst_df.info()

In [None]:
new_order_dst = ['identifier', 'registrationdate', 'containeridentifier', 'issued', 'dsttest', 'drugs']
dst_df = dst_df.reindex(columns=new_order_dst)

In [None]:
# processed_dst.to_csv('Mali_Oct_1_DST.csv', index=False)