In [None]:
#%pip install pyautogen
#%pip install pyarrow
#%pip install scikit-learn matplotlib seaborn IPython tabulate

#### Load MIMIC III or MIMIC IV data

In [None]:
import os
import pandas as pd
import numpy as np

MIMIC_DB_LOCATION = '../../MIMIC-III/1.4/'
#MIMIC_DB_LOCATION = '../../MIMIC-IV'

MEDICAL_CONDITION_NAME = "Acute Kidney Injury"
MEDICAL_CONDITION_ICD_KEYWORDS = ["acute kidney injury", "acute kidney failure", "aki"]

# Create the coding folder if it does not exist
CODING_FOLDER = "autogen"
if not os.path.exists(CODING_FOLDER):
    os.makedirs(CODING_FOLDER)

# Create the data folder if it does not exist
DATA_FOLDER = "data"
if not os.path.exists(f"{CODING_FOLDER}/{DATA_FOLDER}"):
    os.makedirs(f"{CODING_FOLDER}/{DATA_FOLDER}")
    
# Create an array of tables to load
tables = ['patients', 'admissions', 'diagnoses_icd', 'd_icd_diagnoses', 'labevents', 'd_labitems']

# Create a dict for all the dataframes
df = {}

for table in tables:
    # Load the the table if it exists
    if os.path.exists(f'{MIMIC_DB_LOCATION}/{table}.parquet'):
        df[table] = pd.read_parquet(f'{MIMIC_DB_LOCATION}/{table}.parquet')
        continue
    elif os.path.exists(f'{MIMIC_DB_LOCATION}/{table}.csv.gz'):
        df[table] = pd.read_csv(f'{MIMIC_DB_LOCATION}/{table}.csv.gz', compression='gzip')
    elif os.path.exists(f'{MIMIC_DB_LOCATION}/{table}.csv'):
        df[table] = pd.read_csv(f'{MIMIC_DB_LOCATION}/{table}.csv')
    elif os.path.exists(f'{MIMIC_DB_LOCATION}/{table.upper()}.csv.gz'):
        df[table] = pd.read_csv(f'{MIMIC_DB_LOCATION}/{table.upper()}.csv.gz', compression='gzip')
    elif os.path.exists(f'{MIMIC_DB_LOCATION}/{table.upper()}.csv'):
        df[table] = pd.read_csv(f'{MIMIC_DB_LOCATION}/{table.upper()}.csv')
    else:
        print(f'Could not find {table} in {MIMIC_DB_LOCATION}')
        continue

    # convert all columns to lowercase
    df[table].columns = df[table].columns.str.lower()
    
    # Save the dataframe to parquet
    df[table].to_parquet(f'{MIMIC_DB_LOCATION}/{table}.parquet')

#### Clean up the data

In [None]:
# Drop missing patient data and convert datetime columns
if 'anchor_age' in df['patients'].columns: # MIMIC-IV
    df['patients'].dropna(subset=['gender', 'anchor_age', 'anchor_year'], inplace=True)
    df['patients']['anchor_year_datetime'] = pd.to_datetime(df['patients']['anchor_year'].astype(str) + '-01-01')  
else: # MIMIC-III
    df['patients'].dropna(subset=['gender', 'dob'], inplace=True)
    df['patients'].dob = pd.to_datetime(df['patients'].dob)

# change gender to 0 and 1 using a lambda
df['patients']['gender'] = df['patients']['gender'].apply(lambda x: 0 if x == 'F' else 1)
df['patients']['gender'] = df['patients']['gender'].astype(int)

# Drop missing admission data
df['admissions'].dropna(subset=['subject_id', 'hadm_id', 'admittime', 'dischtime'], inplace=True)

# Convert admission and discharge times to datetime
df['admissions'].admittime = pd.to_datetime(df['admissions'].admittime)
df['admissions'].dischtime = pd.to_datetime(df['admissions'].dischtime)

# Remove admissions where admission time is after discharge time
df['admissions'] = df['admissions'][df['admissions'].admittime < df['admissions'].dischtime]

# Convert the charttime to datetime
df['labevents']["charttime"] = pd.to_datetime(df['labevents']["charttime"])

# Drop any rows where hadm_id, valuenum is missing
df['labevents'] = df['labevents'].dropna(subset=['hadm_id', 'valuenum'])

# Filter out any lab events that are not within the admission time
df['labevents'] = df['labevents'].merge(df['admissions'][['hadm_id', 'admittime', 'dischtime']], on='hadm_id')
df['labevents'] = df['labevents'][(df['labevents'].charttime >= df['labevents'].admittime) & 
                            (df['labevents'].charttime <= df['labevents'].dischtime)]

# Clean up the lab items data
df['d_labitems'] = df['d_labitems'].dropna(subset=['itemid', 'label'])

# Clean up diagnoses_icd and d_icd_diagnoses
if 'icd9_code' in df['diagnoses_icd'].columns:
    df['diagnoses_icd'].rename(columns={'icd9_code': 'icd_code'}, inplace=True)
    df['d_icd_diagnoses'].rename(columns={'icd9_code': 'icd_code'}, inplace=True)

df['diagnoses_icd'] = df['diagnoses_icd'].dropna(subset=['subject_id', 'hadm_id', 'icd_code'])
df['d_icd_diagnoses'] = df['d_icd_diagnoses'].dropna(subset=['icd_code', 'long_title'])

#### Calculate age at time of admission

In [None]:
# Calculate age function
def calculate_age(date1, date2):
    date1 = date1.to_pydatetime()
    date2 = date2.to_pydatetime()
    age = (date2 - date1).days // 365.25

    return age

# Perform an inner join between admissions_df and patients_df on subject_id  
patients_admissions_df = pd.merge(df['admissions'], df['patients'], on='subject_id')  

if 'anchor_age' in df['patients'].columns: # MIMIC-IV
    patients_admissions_df['age_at_admission'] = patients_admissions_df['anchor_age'] + (patients_admissions_df['admittime'].dt.year - patients_admissions_df['anchor_year_datetime'].dt.year) 
else: # MIMIC-III
    patients_admissions_df["age_at_admission"] = patients_admissions_df.apply(lambda row: calculate_age(row["dob"], row["admittime"]), axis=1)

# Rename age_at_admission column to age
patients_admissions_df.rename(columns={'age_at_admission': 'age'}, inplace=True)

# Filter out any patients that are less than 18 years old and older than 90 years old at the time of admission
patients_admissions_df = patients_admissions_df[(patients_admissions_df.age >= 18) & (patients_admissions_df.age <= 90)]


#### Filter lab events for chemitry based labs

In [None]:
# Filter labevents_df based on itemid and valuenum conditions  
patients_labs_df = df['labevents'][
    (df['labevents']['itemid'].isin([50862, 50930, 50976, 50868, 50882, 50893, 50912, 50902, 50931, 50983, 50971, 51006])) &
    ((df['labevents']['valuenum'] > 0) | (df['labevents']['itemid'] == 50868)) &
    (df['labevents']['valuenum'].notnull())
].copy()

# Apply conditional logic to create columns based on itemid and valuenum conditions  
conditions = {  
    'albumin': (patients_labs_df['itemid'] == 50862) & (patients_labs_df['valuenum'] <= 10),  
    'globulin': (patients_labs_df['itemid'] == 50930) & (patients_labs_df['valuenum'] <= 10),  
    'total_protein': (patients_labs_df['itemid'] == 50976) & (patients_labs_df['valuenum'] <= 20),  
    'aniongap': (patients_labs_df['itemid'] == 50868) & (patients_labs_df['valuenum'] <= 10000),  
    'bicarbonate': (patients_labs_df['itemid'] == 50882) & (patients_labs_df['valuenum'] <= 10000),  
    'bun': (patients_labs_df['itemid'] == 51006) & (patients_labs_df['valuenum'] <= 300),  
    'calcium': (patients_labs_df['itemid'] == 50893) & (patients_labs_df['valuenum'] <= 10000),  
    'chloride': (patients_labs_df['itemid'] == 50902) & (patients_labs_df['valuenum'] <= 10000),  
    'creatinine': (patients_labs_df['itemid'] == 50912) & (patients_labs_df['valuenum'] <= 150),  
    'glucose': (patients_labs_df['itemid'] == 50931) & (patients_labs_df['valuenum'] <= 10000),  
    'sodium': (patients_labs_df['itemid'] == 50983) & (patients_labs_df['valuenum'] <= 200),  
    'potassium': (patients_labs_df['itemid'] == 50971) & (patients_labs_df['valuenum'] <= 30)  
}  
  
for col, cond in conditions.items():  
    patients_labs_df[col] = np.where(cond, patients_labs_df['valuenum'], np.nan)  

patients_labs_df = patients_labs_df.groupby(['hadm_id', 'charttime']).agg({
    'albumin': 'max',  
    'globulin': 'max',  
    'total_protein': 'max',  
    'aniongap': 'max',  
    'bicarbonate': 'max',  
    'bun': 'max',  
    'calcium': 'max',  
    'chloride': 'max',  
    'creatinine': 'max',  
    'glucose': 'max',  
    'sodium': 'max',  
    'potassium': 'max'    
}).reset_index()

# Sort by hadm_id and charttime
patients_labs_df = patients_labs_df.sort_values(by=['hadm_id', 'charttime'])

# Drop any rows where all values are missing (except for hadm_id and charttime)
patients_labs_df = patients_labs_df.dropna(subset=patients_labs_df.columns[2:], how='all')

# Drop any rows from labs_df where hadm_id is not in patients_df
patients_labs_df = patients_labs_df[patients_labs_df['hadm_id'].isin(patients_admissions_df['hadm_id'])]

# Drop any rows from patients_df where hadm_id is not in labs_df
patients_info_df = patients_admissions_df[patients_admissions_df['hadm_id'].isin(patients_labs_df['hadm_id'])][['hadm_id', 'age', 'gender']]

# Save labs_df to data folder
patients_labs_df.to_parquet(f"{CODING_FOLDER}/{DATA_FOLDER}/patients_labs.parquet")

# Save patients_admissions_df to data folder
patients_info_df.to_parquet(f"{CODING_FOLDER}/{DATA_FOLDER}/patients_info.parquet")

#### Generate a filtered list of ICD9 codes

In [None]:
# Filter df['icd_diagnoses'] based on hadm in patients_labs_df
filtered_icd_diagnoses_df = df['diagnoses_icd'][df['diagnoses_icd']['hadm_id'].isin(patients_labs_df['hadm_id'])]

# Convert icd_code column to lowercase using .loc
filtered_icd_diagnoses_df.loc[:, 'icd_code'] = filtered_icd_diagnoses_df['icd_code'].str.lower()

# Save to data folder
filtered_icd_diagnoses_df[['hadm_id', 'icd_code']].to_parquet(f"{CODING_FOLDER}/{DATA_FOLDER}/patients_diagnoses.parquet")

# Filter d_icd_diagnoses based on icd_code in filtered_icd_diagnoses_df
filtered_d_icd_diagnoses_df = df['d_icd_diagnoses'][df['d_icd_diagnoses']['icd_code'].isin(filtered_icd_diagnoses_df['icd_code'])]

# Save to data folder
filtered_d_icd_diagnoses_df[['icd_code', 'long_title']].to_parquet(f"{CODING_FOLDER}/{DATA_FOLDER}/icd_codes.parquet")

#### Task list for AutoGen to execute

In [None]:
# Get list of columns in patients_labs_df and remove hadm_id and charttime
lab_test_types = list(patients_labs_df.columns)

# Drop hadm_id and charttime from lab_test_types
lab_test_types.remove('hadm_id')
lab_test_types.remove('charttime')

tasks = [
# Research    
    f"""You are a Healthcare Specialist. Given the medical condition, {MEDICAL_CONDITION_NAME}, what are the key indicators and criteria based on blood chemisty lab tests that can be use to predict the onset of the medical condition. 
    - Please don't use any web scraping or external data sources.
    - Only include the chemistry lab tests types that are in the following list (lab_test_types):

{lab_test_types} 

Using a Python code block (research.py) Save your findings to '{DATA_FOLDER}/lab_test_types.json' as an array of lab test types.
    """,

# Processing / Filtering
    f"""You are a Data Scientist with Python development skills.  Please generate the code to perform the following tasks in the same Python code block (named processing_filtering.py):
1. Load '{DATA_FOLDER}/patients_labs.parquet' into pandas dataframe (labs).
2. Load '{DATA_FOLDER}/lab_test_types.json' and create a list of lab test types (lab_test_types).
4. Remove any values in the lab_test_types list that do not exist in the columns of labs dataframe.
5. Remove any columns (except hadm_id, charttime) in the labs dataframe that do not exist in the list of lab_test_types.
6. Remove any rows where all the lab_test_types columns are null.
7. Save the labs dataframe to  '{DATA_FOLDER}/filtered_patients_labs.parquet'.
    """,

# Labeling
    f"""You are a Data Scientist with Python development skills. Please generate the code to perform the following tasks in the same Python code block (named labeling.py):
1. Load the following parquet files in to pandas dataframes
2. Load '{DATA_FOLDER}/patients_diagnoses.parquet' into pandas dataframe (diagnoses).
3. Load '{DATA_FOLDER}/icd_codes.parquet' into pandas dataframe (icd_codes).
4. Create a list of icd_codes (condition_codes) where the long_title column contains (case insensitive) any of the following keywords: {MEDICAL_CONDITION_ICD_KEYWORDS}
5. Create a unique list of hadm_ids (positive_diagnoses) from diagnoses dataframe where the icd_code is in the condition_codes list.
6. Create a new dataframe (labels) with the following columns:
    - hadm_id (unique from labs dataframe)
    - condition_label (1 if hadm_id is in positive_diagnoses list, 0 otherwise)
7. Save the labels as "{DATA_FOLDER}/patients_labels.parquet".
    """,

# Feature Engineering
    f"""You are a Data Scientist with Python development skills who specializes in feature engineering for machine learning models. 
Please generate the code to perform the following tasks in the same Python code block (named feature_engineering.py):
1. Load the following parquet files into pandas dataframes
    - '{DATA_FOLDER}/filtered_patients_labs.parquet' into pandas dataframe (labs).
    - '{DATA_FOLDER}/patients_info.parquet' into pandas dataframe (patient_info).
    - '{DATA_FOLDER}/patients_labels.parquet' into pandas dataframe (labels).    
2. Generate a list of lab test columns (lab_tests) from the labs dataframe.
    - Excluding hadm_id, charttime columns 
3. Group labs dataframe by hadm_id and charttime and take the mean for each column (grouped_labs).
4. Sort the grouped_labs dataframe by hadm_id and charttime.
5. For each column (lab_test) in grouped_labs that exists in lab_tests, calculate the following features:
    - difference from baseline value (lab_test_baseline_delta)
    - delta from previous value (lab_test_diff)
    - time difference in hours from previous value (lab_test_timediff)
    - rate of change per day (lab_test_rateofchange)
6. Drop the following columns:
    - charttime
    - lab_test_timediff
7. Grouped the dataframe by hadm_id and aggregate in the following way:
    - Generate (mean, median, std, min, max) for the following engineered features for each lab_test column:
        - lab_test
        - lab_test_baselinedelta
        - lab_test_delta
        - lab_test_rateofchange
8. Flatten the multi-index columns to a single level (engineered_features)
9. Fix the the column names by removing trailing underscores.
10. Impute engineered_features to fill any missing values using a simple imputer.
11. Merge the patient_info dataframe with the engineered_features dataframe on hadm_id (features)
13. Merge the labels dataframe with the features dataframe on hadm_id (features_labels).
12. Drop any rows with missing values.
15. Drop the hadm_id column from the features_labels dataframe.
16. Save the features_labels as "{DATA_FOLDER}/features_labels.parquet".
    """,

# Dimensionality Reduction
    f"""You are an AI Engineer with Python development skills that specializes in dimensionality reduction. Please generate the code to perform the following tasks in the same Python code block (named dimensionality_reduction.py):
1. Load the following parquet files into pandas dataframes
    - '{DATA_FOLDER}/features_labels.parquet' into pandas dataframe (features_labels).
2. Split the features_labels dataframe into features and labels dataframes with the labels being the condition_label column.
3. Perform dimensionality reduction on the features based on your recommended method for use with a classification model.
4. Make sure the columns names of the reduced features are strings.
5. Combine the reduced features and labels (reduced_features_labels).
6. Save reduced_features_labels to a new parquet file: '{DATA_FOLDER}/reduced_features_labels.parquet'.
7. Print the original number of features and number of features retained after dimensionality reduction.

After the execution of the Python code, please provide a brief explanation of the dimensionality reduction method used, why it was chosen, and what features were retained (if possible).
    """,

# Model Training and Evaluation
    f"""You are an AI Engineer with Python development skills. Please generate the code to perform the following tasks in the same Python code block (named training_evaluation.py):
1. Load the follwing parquet file: '{DATA_FOLDER}/reduced_features_labels.parquet' into a pandas dataframe.
    - This dataframe contains a set of features and one binary label (condition_label)
2. Split the dataframe into features (X) and labels (y) dataframes with the labels being the condition_label column.
3. Split the data into training (X_train, y_train) and testing sets (X_test, y_test).
4. Train the following classifiers on the training data:
    - DecisionTreeClassifier
        - max_depth=5
        - random_state=42
    - RandomForestClassifier
        - n_estimators=300 
        - max_depth=None
        - min_samples_split=2
        - min_samples_leaf=2
        - random_state=42
        - n_jobs=-1
    - LogisticRegression
        - max_iter=1000
        - n_jobs=-1
        - random_state=42
    - GradientBoostingClassifier
        - n_estimators=300
        - random_state=42
    - MLPClassifier
        - alpha=1, 
        - max_iter=1000
        - random_state=42
    - KNeighborsClassifier
        - n_neighbors=5
        - n_jobs=-1
5. Evaluate each model on the testing data and perform tasks on it:
    - Generate a classification report based on X_test and y_test and save it in a dict (classification_report)
    - Calculate the roc curve (roc_curve) based on X_test and y_test and convert it to a dict with the columns (fpr, tpr, auc)
    - Calculate the precision-recall curve (pr_curve) based on X_test and y_test and convert it to a dict with the columns (precision, recall, auc)
    - Save the model as a pickle file to  '{DATA_FOLDER}/model_type.pkl'.
5. Create a dict (model_details) with the model names as keys and the values as the dicts (classification_report, roc_curve, pr_curve) and model_path.
6. Save the model_details as JSON to '{DATA_FOLDER}/model_details.json'.
7. For each model in model_details load the classification report as a pandas dataframe and print it as a markdown table.

After the execution of the Python code, please provide a detail analysis of each model by describing what the classification report metrics mean.

Give your detailed analysis, please provide a brief explanation of the model that performed the best and why it was chosen.
    """,

# Model Visualization
    f"""You are aa Visualization Expert with Python development skills. Please generate the code to perform the following tasks in the same Python code block (named visualization.py):
1. Load the model performance details from '{DATA_FOLDER}/model_details.json' into a pandas dataframe.
    - The keys in the JSON file are the model names and the columns (classification_report, roc_curve, pr_curve) are the model performance data.
2. Based on the classification report data (classification_report), create a combined bar plot comparing the report data for each model.
    - Create a dict (cr_data) with the model names as keys and the value: precision (from weighted avg), recall (from weighted avg), f1-score (from weighted avg), accuracy.
    - Plot the Performance Metric (precision, recall, f1-score and accuracy) for each model based on cr_data grouped by the model names
        - Group the bars by the model name with bars for each performance metric (precision, recall, f1-score and accuracy).
        - Set the hue to the Performance Metric key.
        - Scale the min of the y-axis to be slightly less than the min value of the data.
        - Scale the max of the y-axis to be slightly more than the max value of the data.
        - Remove "Classifier" from the model names for better readability.
        - Rotate the x-axis labels for better readability.
    - Save the plot to '{DATA_FOLDER}/classification_report.png'
3. Based on the roc curve data (roc_curve), create a combined line plot of the roc curve for each model.
    - Create a dict (roc_curve_data) with the model names as keys and the values: fpr, tpr, auc.
    - Plot the ROC curve for each model with the AUC value in the legend based on roc_curve_data.
    - Remove "Classifier" from the model names for better readability.
    - Save the plot to '{DATA_FOLDER}/roc_curve.png'
4. Based on the precision-recall data (pr_curve), create a combined line plot of the precision-recall for each model.
    - Create a dict (pr_curve_data) with the model names as keys and the values: precision, recall, auc.
    - Plot the Precision-Recall curve for each model with the AUC value in the legend based on pr_curve_data.
    - Remove "Classifier" from the model names for better readability.
    - Save the plot to '{DATA_FOLDER}/pr_curve.png'
5. Ouput the links to the plots that were saved.
    """
]

#### Initialize AutoGen and iterate thru the dialog with the AI Assistants

The AutoGen config requires called 'OAI_CONFIG_LIST' with the following format:

```json
[
    {
        "model": "gpt-4",
        "api_key": "XXXXXXXXXXXXXXXXXXXXXXXX",
        "base_url": "https:/XXXXXXXXXXXX.openai.azure.com/",
        "api_type": "azure",
        "api_version": "2024-02-15-preview"
    }
]
```

In [None]:
import autogen

from autogen.coding import LocalCommandLineCodeExecutor
from autogen import Cache

# Load the configuration list from the JSON file
config_file_or_env = "OAI_CONFIG_LIST"
config_list = autogen.config_list_from_json(config_file_or_env)

# Define the GPT-4 model configuration
llm_config = {
    "cache_seed": 41,
    "temperature": 0.3,
    "top_p": 0.9,
    "max_tokens": 4000,
    "config_list": config_list,
    "timeout": 600}

with Cache.disk(cache_path_root=f"{CODING_FOLDER}/cache") as cache:

    # create an AssistantAgent named "Healthcare Specialist"
    healthcare_specialist = autogen.AssistantAgent(
        name="Healthcare Specialist (AI Assistant)",
        llm_config=llm_config
    )

    # create an AssistantAgent named "Data Scientist"
    data_scientist1 = autogen.AssistantAgent(
        name="Data Scientist (AI Assistant)",
        llm_config=llm_config
    )

    # create an AssistantAgent named "Data Scientist"
    data_scientist2 = autogen.AssistantAgent(
        name="Data Scientist (AI Assistant)",
        llm_config=llm_config
    )

    # create an AssistantAgent named "Data Scientist"
    data_scientist3 = autogen.AssistantAgent(
        name="Data Scientist (AI Assistant)",
        llm_config=llm_config
    )

    # create an AssistantAgent named "AI Engineer"
    ai_engineer1 = autogen.AssistantAgent(
        name="AI Engineer (AI Assistant)",
        llm_config=llm_config
    )

    # create an AssistantAgent named "AI Engineer"
    ai_engineer2 = autogen.AssistantAgent(
        name="AI Engineer (AI Assistant)",
        llm_config=llm_config
    )

    # create an AssistantAgent named "Visualizations Expert"
    vis_expert = autogen.AssistantAgent(
        name="Visualizations Expert (AI Assistant)",
        llm_config=llm_config
    )

    # create a UserProxyAgent instance named "User"
    user_proxy = autogen.UserProxyAgent(
        name="User",
        human_input_mode="NEVER",
        max_consecutive_auto_reply=10,
        is_termination_msg=lambda x: x.get("content", "").rstrip().endswith("TERMINATE"),
        code_execution_config={
            # the executor to run the generated code
            "executor": LocalCommandLineCodeExecutor(work_dir=CODING_FOLDER, timeout=3600),
        },
    )

    chats = [
        {
            "recipient": healthcare_specialist,
            "message": tasks[0],
            "clear_history": True,
            "silent": False,
            "summary_method": "reflection_with_llm",
            "cache": cache
        },
        {
            "recipient": data_scientist1,
            "message": tasks[1],
            "summary_method": None,
            "cache": cache
        },        
        {
            "recipient": data_scientist2,
            "message": tasks[2],
            "summary_method": None,
            "cache": cache
        },
        {
            "recipient": data_scientist3,
            "message": tasks[3],
            "summary_method": None,
            "cache": cache
        },    
        {
            "recipient": ai_engineer1,
            "message": tasks[4],
            "summary_method": "last_msg",
            "cache": cache
        },         
        {
            "recipient": ai_engineer2,
            "message": tasks[5],
            "summary_method": "last_msg",
            "cache": cache
        }, 
        {
            "recipient": vis_expert,
            "message": tasks[6],
            "summary_method": None,
            "cache": cache
        }
    ]

    chat_res = user_proxy.initiate_chats(chats)


#### Save the dialog to a markdown file

In [None]:
from IPython.display import display, Markdown

# Initialize the dialog
dialog = ""

for idx, chat in enumerate(chat_res):
    for msg in chat.chat_history:
        assistant = chats[idx]['recipient'].name

        # Add to/from to dialog
        if msg['role'] == "user":
            dialog += f"## {assistant} $\\Rightarrow$ User\n"
        elif msg['role'] == "assistant":
            dialog += f"## User $\\Rightarrow$ {assistant}\n"

        # Check if message content begins with exitcode
        if msg['content'].startswith("exitcode:"):
            # Split the message content by newline
            lines = msg['content'].split("\n")
            
            # Get the exit code
            exit_code = lines[0].split(":")[1].strip().split(' ')[0]

            if exit_code == '0':
                dialog += f"> **_SUCCESS:_** The provided code executed successfully.\n\n"
            else:
                dialog += f"> **_ERROR:_** There was an error in the code provided**: {lines[0].split(":")[1].strip()}\n\n"

            if len(lines[1].split(":")) > 1:

                # Remove 'Code output'
                lines[1] = lines[1].split(":")[1].strip() 

                # Get the code output
                output = "\n".join(lines[1:]).strip()

                # Display the output
                dialog += f"\n{output}\n\n"
        else:
            # Remove 'TERMINATE' from the message content
            content = msg['content'].replace("TERMINATE", "")

            # Remove all content past "Context:\n"
            if "Context:" in content:
                content = content.split("Context:")[0]
            
            dialog += content + "\n\n"
    
        dialog += "---\n"

# Display the dialog as markdown
display(Markdown(dialog))

# Save the dialog to a markdown file
with open(f"{CODING_FOLDER}/dialog.md", "w") as text_file:
    text_file.write(dialog)
