In [14]:
import pandas as pd
import os
import platform
device_name = platform.node()

previous_folder = os.getcwd()
print ("This is the working folder: " + previous_folder)

if device_name == 'mmd-MS-7D98': 
    #This passage is done because everytime I log into the remote server the default folder is:
    #/mmd/home and I need to localize the correct folder to load the settings 
    os.chdir("/media/mmd/Samsung_T5/GitHub/UMD")

if previous_folder != os.getcwd(): # This is now the right working folder
    print("The current working folder has been changed, now the working folder is: " + os.getcwd())

This is the working folder: /media/mmd/Samsung_T5/GitHub/UMD


In [15]:
from my_library.config import *

In [16]:
import json
file_path_json = working_directory+"/data_config.json"

In [17]:
import json
import os

def update_json(file_path, new_dict, dict_name):
    # Check if the JSON file exists
    if os.path.exists(file_path):
        with open(file_path, "r") as f:
            try:
                data = json.load(f)
                if not isinstance(data, dict):
                    data = {}  # Reset if not a dictionary
            except json.JSONDecodeError:
                data = {}  # Reset if invalid JSON
    else:
        data = {}  # Create new if file does not exist

    # Check if the dictionary already exists
    if dict_name not in data:
        data[dict_name] = new_dict  # Add new dictionary

    # Write back to file
    with open(file_path, "w") as f:
        json.dump(data, f, indent=4)

In [18]:
tags = ["adsq", "br3", "pdl1-3"]
columns = [ 'HISTOLOGY', 'BEST_RESPONSE_3','PDL1_CATHEGORY']

file_path = "data_config.json"
update_json(file_path, tags, "models")

In [19]:
projects_list  = ["projects/I3lung-sqadqc-project",
]
project_path = projects_list[0]

### labels

In [20]:
labels = dict(zip(tags, columns))

In [21]:
print(labels)
update_json(file_path_json, labels, "labels")

{'adsq': 'HISTOLOGY', 'br3': 'BEST_RESPONSE_3', 'pdl1-3': 'PDL1_CATHEGORY'}


### model outdir

In [22]:
model_outdir_path = []
for tag in tags: 
    temp = os.path.join(working_directory,"projects", "trained_models",f"{tag}")
    model_outdir_path.append(temp)


In [23]:
model_outdir = dict(zip(tags, model_outdir_path))
print(model_outdir)
update_json(file_path_json, model_outdir, "model_outdir")

{'adsq': '/media/mmd/Samsung_T5/GitHub/UMD/projects/trained_models/adsq', 'br3': '/media/mmd/Samsung_T5/GitHub/UMD/projects/trained_models/br3', 'pdl1-3': '/media/mmd/Samsung_T5/GitHub/UMD/projects/trained_models/pdl1-3'}


### creating filters

In [24]:
#This code is used to store the filters dinamycally in the json, to load them and train the models.
file_path = os.path.join(working_directory, project_path, "annotations.csv")  # Replace with your actual file path
df = pd.read_csv(file_path) 


for l,v in labels.items():
    filter = {}

    # name of the dictionary containing the filter
    filter_name = f'filter_{l}'

    #extractiong of the only valid values of the columns of the annotations
    distinct_values = df[v].dropna().unique().astype(str).tolist()

    distinct_values = [item for item in distinct_values if item != 'other']
    
    #Adding the dictionary to the json
    filter[v] = distinct_values

    #This is the exact syntax that the filter must have to be accepted by slideflow
    display(filter)

    update_json(file_path_json, filter, filter_name)


{'HISTOLOGY': ['squamous', 'adenocarcinoma']}

{'BEST_RESPONSE_3': ['2.0', '1.0', '0.0']}

{'PDL1_CATHEGORY': ['1-49 %', '>=50 %', '< 1 %']}

### br3 converting types to strings

In [25]:
import json

# Open and read the JSON file
with open(file_path_json, "r") as file:
    data = json.load(file)  # Load JSON data into a Python dictionary or list

print(data)
data.keys()

{'models': ['adsq', 'br3', 'pdl1-3'], 'labels': {'adsq': 'HISTOLOGY', 'br3': 'BEST_RESPONSE_3', 'pdl1-3': 'PDL1_CATHEGORY'}, 'model_outdir': {'adsq': '/media/mmd/Samsung_T5/GitHub/UMD/projects/trained_models/adsq', 'br3': '/media/mmd/Samsung_T5/GitHub/UMD/projects/trained_models/br3', 'pdl1-3': '/media/mmd/Samsung_T5/GitHub/UMD/projects/trained_models/pdl1-3'}, 'filter_adsq': {'HISTOLOGY': ['squamous', 'adenocarcinoma']}, 'filter_br3': {'BEST_RESPONSE_3': ['2.0', '1.0', '0.0']}, 'filter_pdl1-3': {'PDL1_CATHEGORY': ['1-49 %', '>=50 %', '< 1 %']}}


dict_keys(['models', 'labels', 'model_outdir', 'filter_adsq', 'filter_br3', 'filter_pdl1-3'])

In [26]:
# Step 1: Load the .csv file into a DataFrame
file_path = os.path.join(working_directory, project_path, "annotations.csv")  # Replace with your actual file path
df = pd.read_csv(file_path)

#The json contains the names of the outcome labels
outcome_label = data["labels"]["br3"]

#converting float values to strings for training with slideflowx
df[outcome_label] = df[outcome_label].astype(str)

#save annotations back to the .csv file
#df.to_csv(file_path, index = False)