# Restructure NETICA text output

Jannicke currently exports text files from NETICA in the format illustrated here:

    K:\Avdeling\317 Klima- og miljømodellering\JMO\FETNET-WOE\NeticaReports\20180822\txt

These files need restructuring and combining to create a single Excel file with the structure illustrated here:

    K:\Avdeling\317 Klima- og miljømodellering\JMO\FETNET-WOE\NeticaReports\20180822\other\FET_post_all_20180822.xlsx

The function below performs the necessary processing.

In [1]:
def restructure_netica_output(txt_fold, title_name_xlsx, out_xlsx):
    """ Restructures and combines text file output from NETICA for use
        in Jannicke's subsequent analyses.
        
    Args:
        txt_fold:        Raw str. Path to folder containing "vertical" format 
                         .txt files exported from NETICA. All .txt files in 
                         this folder will be processed, so make sure it only 
                         contains NETICA output
        title_name_xlsx: Raw str. Path to Excel file containing lookup table
                         matching Node and State "Titles" to "Names"
        out_xlsx:        Raw str. Path to output Excel file to be created. 
                         This file restructures and aggregates data from all 
                         the input text files
    
    Returns:
        Dataframe. The desired Excel file is also svaded.
    """
    import pandas as pd
    import glob
    import os
    from io import StringIO
    
    # Get a list of text files to process
    search_path = os.path.join(txt_fold, '*.txt')
    file_list = glob.glob(search_path)

    # Container for processed dataframes
    df_list = []

    # Loop over files
    for fpath in file_list:
        # Get scenario no. from file name
        scen = int(os.path.split(fpath)[1].split('_')[2])

        # Read whole file, splitting at blank lines
        fobj = open(fpath, 'r').read()
        data_grps = fobj.split('\n\n')

        # Loop over parts
        for part in data_grps[:-1]:
            # Read to df
            df = pd.read_csv(StringIO(part), sep='\t')

            # Extract components and re-order cols
            df['NodeTitle'] = df.columns[0]
            df['StateTitle'] = df.index
            df.reset_index(inplace=True, drop=True)
            df['Scenario'] = scen
            df.columns = ['Probability', 'NodeTitle', 'StateTitle', 'Scenario']
            df = df[['Scenario', 'NodeTitle', 'StateTitle', 'Probability']]

            # Add to output
            df_list.append(df)

    # Combine to single df
    df = pd.concat(df_list, axis=0)
    df.reset_index(inplace=True, drop=True)
    
    # Join in Node and State "Names"
    lu_df = pd.read_excel(title_name_xlsx, sheet_name='title_name_mapping')
    df = pd.merge(df, lu_df, how='left', on=['NodeTitle', 'StateTitle'])
    df = df[['Scenario', 'NodeTitle', 'NodeName', 'StateTitle', 
             'StateName', 'Probability']]

    # Write output
    df.to_excel(out_xlsx, index=False)
    
    return df

# Process data

In [2]:
# User input
# Folder containing .txt files
txt_fold = (r'C:\Data\James_Work\Staff\Jannicke_M\FETNET-WOE\netica_txt_output')

# Folder containing .txt files
title_name_xlsx = (r'C:\Data\James_Work\Staff\Jannicke_M\FETNET-WOE\fetnet_title_names_mapping.xlsx')

# Excel file to create
out_xlsx = (r'C:\Data\James_Work\Staff\Jannicke_M\FETNET-WOE\netica_restructured.xlsx')

# Process
df = restructure_netica_output(txt_fold, title_name_xlsx, out_xlsx)

df.head(10)

Unnamed: 0,Scenario,NodeTitle,NodeName,StateTitle,StateName,Probability
0,1,Hydrophobicity (Kow),Hydrophobicity,low,low,1.0
1,1,Hydrophobicity (Kow),Hydrophobicity,>5.5,high,0.0
2,1,Membrane crossing,MembraneCrossing,low,low,0.0
3,1,Membrane crossing,MembraneCrossing,medium,medium,0.25
4,1,Membrane crossing,MembraneCrossing,high,high,0.75
5,1,Mol. weight,MolWeight,low,low,1.0
6,1,Mol. weight,MolWeight,> 600,high,0.0
7,1,Ratio tox. Daphnia / algae,RatioToxDapAlg,<0.5,low,0.33333
8,1,Ratio tox. Daphnia / algae,RatioToxDapAlg,0.5 - 2,medium,0.33333
9,1,Ratio tox. Daphnia / algae,RatioToxDapAlg,>2,high,0.33333
