# Esophageal Cancer Research - Clinical Table
* By Sangwon Baek
* Samsung Medical Center
* August 3rd, 2023

### Import necessary packages and read data

In [1]:
# Import necessary modules
from ..src.clinical_table_analysis import main as analyze_clinical_data
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

### Create Clinical Table

In [None]:
# Run the clinical data analysis
clinical_table_df, frequency_table_df = analyze_clinical_data()

In [89]:
# Display the clinical table
print("Clinical Table:")
display(clinical_table_df)

In [90]:
# Display the frequency table
print("\nFrequency Table:")
display(frequency_table_df)

In [None]:
# Load the preprocessed data for further analysis or visualization
df = pd.read_csv("../data/preprocessed/ECA_Dataset.csv")

## Create visualizations

In [None]:
# 1. Distribution of T categories
plt.figure(figsize=(10, 6))
df['T_category'].value_counts().sort_index().plot(kind='bar')
plt.title('Distribution of T Categories')
plt.xlabel('T Category')
plt.ylabel('Count')
plt.show()

In [None]:
# 2. Distribution of N categories
plt.figure(figsize=(10, 6))
df['N_category'].value_counts().sort_index().plot(kind='bar')
plt.title('Distribution of N Categories')
plt.xlabel('N Category')
plt.ylabel('Count')
plt.show()

In [94]:
# 3. Age distribution by T category
plt.figure(figsize=(12, 6))
sns.boxplot(x='T_category', y='Age', data=df)
plt.title('Age Distribution by T Category')
plt.show()

In [None]:
# 4. Total Dissected LN by T category
plt.figure(figsize=(12, 6))
sns.boxplot(x='T_category', y='total_LN', data=df)
plt.title('Total Dissected LN by T Category')
plt.show()

In [96]:
# 5. Primary Site distribution
plt.figure(figsize=(10, 6))
df['Primary_Site'].value_counts().plot(kind='pie', autopct='%1.1f%%')
plt.title('Distribution of Primary Sites')
plt.ylabel('')
plt.show()