# Get text file containing names of all different parameters (biol species/ phychem param)

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

## either phychem or biol

In [None]:
# Get the current directory of the notebook
current_dir = os.getcwd()
parent_dir = os.path.abspath(os.path.join(current_dir, os.pardir, os.pardir))
# Import phychem data
param_file = os.path.join(parent_dir, "phychem_data", "phychem_param_80_19", "all_concatenated1980_2019.csv")
param_df = pd.read_csv(param_file, delimiter=";")

# Text file with all species

phychem:
name_category = 'libelle_groupe_parametre'
name_parameter = 'libelle_parametre'
code_parameter = 'code_parametre'
code_category = 'code_groupe_parametre'
name_parent = name_category
code_parent = code_category

biol:
name_category = 'libelle_support'
name_parameter = 'libelle_appel_taxon'
code_parameter = 'code_appel_taxon'
name_parent = 'libelles_taxons_parents'
code_parent = 'codes_taxons_parents'

In [None]:
# change names according to your file
name_category = 'libelle_groupe_parametre'
name_parameter = 'libelle_parametre'

code_parameter = 'code_parametre'
code_category = 'code_groupe_parametre'

name_parent = name_category
code_parent = code_category

In [None]:
def get_category_dict(df, name_category, name_parameter, type_data):

    # create a dictionary to store categories (Ephemeroptera/Paramètres Azotés) and their corresponding parameters
    category_dict = {}

    # iterate over each row in the DataFrame
    for index, row in df.iterrows():

        category = row[name_category]
        parameter = row[name_parameter]

        # special case for phychem
        if type_data=="phychem":
            category = category.split("'")[1]
        else: continue
        
        # create new category key in dict
        if category not in category_dict:
            category_dict[category] = set()  # to store unique taxons
        
        # add name of parameter to corresponding category
        category_dict[category].add(parameter)

    # sort parameters alphabetically within each category
    for name_category in category_dict:
        category_dict[name_category] = sorted(category_dict[name_category])

    return category_dict

In [None]:
category_dict = get_category_dict(param_df, name_category=name_category, name_parameter=name_parameter, type_data="phychem")

In [None]:
def all2txt(dict, name_category, name_txt_file, intro_txt_file):

    # write categories and their param to a text file
    with open(name_txt_file, 'w') as file:
        file.write(intro_txt_file)
        file.write("\n")
        for name_category in sorted(dict.keys()):  # Sort categories alphabetically
            file.write(f"{name_category}\n")
        file.write("\n")
        for name_category in sorted(dict.keys()):  # Sort categories alphabetically
            file.write(f"{name_category}:\n")
            for parameter in dict[name_category]:
                file.write(f"    {parameter}\n")
            file.write("\n")

    print("File created with success!")

In [None]:
# to change whether taxons/species

name_txt_file = 'param_nitro_80_99.txt'
intro_txt_file = "Those are the different physicochemical parameters present in the Seine. The categories are the following:\n"

In [None]:
all2txt(category_dict, name_category=name_category, name_txt_file=name_txt_file, intro_txt_file=intro_txt_file)

# Text file with only species of interest

For biol:
interest_species = ['Ephemeroptera', 'Plecoptera', 'Trichoptera']
interest_codes = [348, 1, 181]

In [None]:
# Define the species of interest
interest_species = ['Paramètres Azotés', 'Paramètres Phosphorés'] 
interest_codes = [47, 48]

In [None]:
def get_dict_interest(df, interest_species, interest_codes, name_category, name_parameter, code_category, code_parameter):

    interest_dict = {species: set() for species in interest_species}

    # iterate over each row
    for index, row in df.iterrows():

        param = row[name_parameter]
        code_param = row[code_parameter]
        name_categ = row[name_category]
        code_categ = row[code_category]
        
        # convert to int if it's not already
        if not isinstance(code_param, int):
            code_param = int(code_param)
        if not isinstance(code_categ, int):
            code_categ = int(code_categ)

        # check if any of the EPT species is present in the parents list
        for interest_code in interest_codes:
            if interest_code in code_categ:
                interest_dict[interest_code].add((param, code_param))

    return interest_dict


In [None]:
interest_dict = get_dict_interest(param_df, interest_species, interest_codes, name_parent, name_parameter, code_parent, code_parameter)

In [None]:
def get_codes_list(interest_dict, interest_species):
    
    # list to store the lists of EPT codes for each species
    EPT_codes_sorted = [[] for _ in range(len(interest_species))]

    # iterate over the items in the dictionary
    for index, (key, values) in enumerate(interest_dict.items()):
        # extract the integer part of each tuple and append it to the corresponding list
        for value in values:
            EPT_codes_sorted[index].append(value[1])

    return EPT_codes_sorted

In [None]:
# only for biol
EPT_codes_sorted = get_codes_list(interest_dict, interest_species)

In [None]:
def interest2txt(interest_dict, name_txt_file, intro_txt_file, interest_species):

    # Write categories and subcategories to a text file
    with open(name_txt_file, 'w') as file:
        file.write(intro_txt_file)
        file.write("\n")

        for index, interest_specie in enumerate(interest_species):
            # file.write(f"{EPT_specie}: {interest_codes[index]}\nAll codes in {EPT_specie}:\n{EPT_codes_sorted[index]}\n\n")
            for code_n_taxon in sorted(interest_dict[interest_specie]):
                file.write(f"    {code_n_taxon}\n")
            
            file.write("\n")

In [None]:
# to change whether taxons/parameters
name_txt_file = 'list_taxons_EPT.txt'
intro_txt_file = "Code taxons related to EPT in the Seine, from Naiades dataset:\n"

In [None]:
interest2txt(interest_dict, name_txt_file, intro_txt_file, interest_species)