# Dependencies

In [2]:
import pandas as pd
import numpy as np

from matplotlib import pyplot as plt

from utils import unique

import pickle

from tabulate import tabulate

# Load meta data

In [3]:
# Open the dataset with meta data for all relevant sequences
with open("/local/data1/simjo484/mt_data/all_data/MRI/extracted_meta.pkl", "rb") as f:
    meta_main = pickle.load(f)

In [4]:
# What are the Notes? And how often do they occur?

#for i in unique(meta_main["Notes"]).values:
#    print(i)

# Produce information for each diagnose

In [None]:
diagnoses = unique(meta_main["diagnosis"])["Values"].tolist()
image_types = unique(meta_main[meta_main["diagnosis"] == diagnoses[0]]["image_type"])["Values"].tolist()

patient_meta = meta_main.drop_duplicates(subset=["subjetID"])

# Print information for each diagnose
for diag in diagnoses:
    print(" ====== Diagnose: ", diag, " =======")
    
    # Counts per seq
    print(unique(meta_main[meta_main["diagnosis"] == diag]["image_type"]))
    # for img_typ in unique(meta_main[meta_main["diagnosis"] == diag]["image_type"]).values:
    #     print(img_typ)
    #     #counts_imgtype = unique(meta_main[meta_main["diagnosis"] == diagnoses[0]]["image_type"]).values
    
    # Gender
    counts_gend = unique(patient_meta[patient_meta["diagnosis"] == diag]["gender"])

    print("\nGender counts:")
    print(counts_gend.values)
    print("Male proportion: ", counts_gend[counts_gend["Values"] == "Male"]["Counts"].values[0] / counts_gend["Counts"].values.sum())
    print("-------------------------------\n")

# Make Diagnose table for Slides

In [None]:
# Number of patients for each diagnose
diagnose_counts = unique(meta_main.drop_duplicates(subset=["subjetID", "diagnosis"])["diagnosis"])

# Number of unique sessions for each diagnosis
session_counts = unique(meta_main.drop_duplicates(subset=["subjetID", "session_name", "diagnosis"])["diagnosis"])

# Combine counts
joint = pd.merge(diagnose_counts, session_counts, on="Values", how="outer").sort_values(by="Counts_x", ascending=False)

# Make table
print(tabulate(joint, headers=["Diagnose", "# Patients", "# Sessions"], tablefmt="latex", showindex=False))

In [None]:
patient_meta

In [None]:
diagnoses

# Distribution of survival

In [None]:
# Filter on the observations that are pre operation.
data_preop = data[data["session_status"] == "pre_op"]

# What is the distribution of "survival"? Assuming it is number of days.
# Distribution roughly 0 to 10.000 days (about 27 years), mean roughly 4500 days (12 years).
data_preop.drop_duplicates(subset=["subjetID", "diagnosis"])["survival"].plot(kind="hist")

# Atypical Teratoid Rhabdoid Tumor (ATRT)

In [None]:
data_atrt = data[data["diagnosis"] == "Atypical Teratoid Rhabdoid Tumor (ATRT)"]
print(data_atrt.shape)

fig, axs = plt.subplots(nrows=2, sharex=False)

counts_gender = np.unique(data_atrt["gender"], return_counts=True)
axs[0].bar(x=counts_gender[0], height=counts_gender[1])
axs[0].set_xlabel("Distribution of gender")

# Figures

In [None]:
fig, axs = plt.subplots(nrows=2, sharex=False)

counts_gender = np.unique(data["gender"], return_counts=True)
axs[0].bar(x=counts_gender[0], height=counts_gender[1])
axs[0].set_xlabel("Distribution of gender")

counts_eth = np.unique(data["ethnicity"], return_counts=True)
axs[1].bar(x=counts_eth[0], height=counts_eth[1])
axs[1].set_xlabel("Distribution of ethnicity")