## LUAD Phenotypic analysis

Counting of phenotypic features to create a tabular overview of the cohorts after clustering.

In [6]:
#import the necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import openpyxl


%matplotlib inline

# import warnings filter
from warnings import simplefilter
# ignore all future warnings
simplefilter(action='ignore', category=FutureWarning)

Import the dataset containing phenotypic features and the results table from the cluster analysis. After filtering, join them to have cluster and phenotypic data in one dataset.

In [7]:
df_pheno_all = pd.read_csv("Phenotypic_data_LUAD.tsv", sep= "\t", index_col= 0)
X = df_pheno_all 
X
df_clusters = pd.read_excel("LUAD_Table_for_decon.xlsx", sheet_name = 3, index_col = 0)
df_clusters = df_clusters.drop(["histological_type"], axis = 1)
cluster_pheno = df_clusters.join(X)
pheno_means = cluster_pheno.groupby("Cluster").mean()

After having created a dataset, count the phenotypic daty by cluster and create a excel spreadsheet with the counted information.

In [8]:
gender_cluster = cluster_pheno.groupby("Cluster")["gender"].value_counts()
KRAS_EGFR_ALK_per_Cluster = cluster_pheno.groupby("Cluster")["Canonical_mut_in_KRAS_EGFR_ALK"].value_counts()

pT_cluster = cluster_pheno.groupby("Cluster")["pathologic_T"].value_counts()
pN_cluster = cluster_pheno.groupby("Cluster")["pathologic_N"].value_counts()
pM_cluster = cluster_pheno.groupby("Cluster")["pathologic_M"].value_counts()

pStage_cluster = cluster_pheno.groupby("Cluster")["pathologic_stage"].value_counts()

histo_cluster = cluster_pheno.groupby("Cluster")["histological_type"].value_counts()

neoadj_cluster = cluster_pheno.groupby("Cluster")['history_of_neoadjuvant_treatment'].value_counts()

add_pharm_cluster = cluster_pheno.groupby("Cluster")['additional_pharmaceutical_therapy'].value_counts()

add_loc_surg_cluster = cluster_pheno.groupby("Cluster")['additional_surgery_locoregional_procedure'].value_counts()
add_met_surg_cluster = cluster_pheno.groupby("Cluster")['additional_surgery_metastatic_procedure'].value_counts()
add_rad_cluster = cluster_pheno.groupby("Cluster")['additional_radiation_therapy'].value_counts()
age_counts_cluster = cluster_pheno.groupby("Cluster")['age_at_initial_pathologic_diagnosis'].value_counts()
age_mean_cluster = cluster_pheno.groupby("Cluster")['age_at_initial_pathologic_diagnosis'].mean()

Cnncl_mt_n_KRAS_EGFR_ALK_RET_ROS1_BRAF_ERBB2_HRAS_NRAS_AKT1_MAP2_cluster = cluster_pheno.groupby("Cluster")['Cnncl_mt_n_KRAS_EGFR_ALK_RET_ROS1_BRAF_ERBB2_HRAS_NRAS_AKT1_MAP2'].value_counts()


with pd.ExcelWriter('LUAD_pheno_analysis.xlsx') as writer:
    cluster_pheno.to_excel(writer, sheet_name='Raw')
    gender_cluster.to_excel(writer, sheet_name='Gender')
    age_mean_cluster.to_excel(writer, sheet_name="age_mean")
    age_counts_cluster.to_excel(writer, sheet_name="age_counts")
    histo_cluster.to_excel(writer, sheet_name="Histo_Subtype")
    pT_cluster.to_excel(writer, sheet_name="pT")
    pN_cluster.to_excel(writer, sheet_name="pN")
    pM_cluster.to_excel(writer, sheet_name="pM")
    KRAS_EGFR_ALK_per_Cluster.to_excel(writer, sheet_name="KRAS_EGFR_ALK_per_Cluster")
    Cnncl_mt_n_KRAS_EGFR_ALK_RET_ROS1_BRAF_ERBB2_HRAS_NRAS_AKT1_MAP2_cluster.to_excel(writer, sheet_name="More mutations included")
    neoadj_cluster.to_excel(writer, sheet_name="History_neoadj_treatment")
    add_pharm_cluster.to_excel(writer, sheet_name="add_pharmacologic_treatment")
    add_rad_cluster.to_excel(writer, sheet_name="add_radiation_treatment")
    add_loc_surg_cluster.to_excel(writer, sheet_name="add_locoregional_surgical_proc")
    add_met_surg_cluster.to_excel(writer, sheet_name="add_metastatic_surgical_proc")
    
