In [10]:
import pandas as pd
import math
from sklearn.manifold import TSNE
import matplotlib.colors as mcolors
import matplotlib.pyplot as plt
import SegmentationGuideline as SG
import matplotlib

In [1]:
PATH_DISCOVERY_CSV = 'LOCAL PATH'
PATH_ERODED_DISCOVERY_CSV = 'LOCAL PATH'
PATH_VALIDATION_CSV = 'LOCAL PATH'

In [None]:
def TSNE_maker(df, sub_analysis=None):

    """
    df param: The dataframe containing a full array of radiomic features.
    sub_analysis param: if None, all the features will be used. a list of one or more of ['shape', 'intensity' and 'texture'] can indicate what 
    categories of features to be used.
    Returns a dataframe in which the radiomic feature columns are replaced with two columns of ['tsne 1', 'tsne 2']
    """
    
    feature_start = df.columns.get_loc('original_shape_Elongation')
    feature_end = df.columns.get_loc('lbp-3D-k_ngtdm_Strength')+1
    feat_df = df.iloc[:, feature_start:feature_end]
    rest_df = df.drop(columns=df.columns[feature_start:feature_end])

    
    if sub_analysis != None:
        
        valid_subanalysis_options = ['shape', 'intensity', 'texture']
        
        feat_names = df.columns[feature_start:feature_end].tolist()    

        included_feat_names = []
        for n in sub_analysis:
            if n == 'shape':
                shape_feat_names = [f for f in feat_names if 'shape' in f]
                included_feat_names += shape_feat_names
                
            elif n == 'intensity':
                intensity_feat_names = [f for f in feat_names if 'firstorder' in f]
                included_feat_names += intensity_feat_names

            elif n == 'texture':
                texture_feat_names = [f for f in feat_names if 'firstorder' not in f and 'shape' not in f]
                included_feat_names += texture_feat_names
                
            else:
                raise KeyError(f"Invalid sub_analysis category: '{n}'. Expected one of {valid_subanalysis_options}.")
        
        feat_df = df[included_feat_names]


    # t-SNE hyperparameter optimization
    ideal_perplexity = math.sqrt(feat_df.shape[0])

    
    # t-SNE dimensionalty reduction
    tsne_x_embedding = TSNE(n_components=2, perplexity=ideal_perplexity, n_jobs=-1, max_iter=1000, learning_rate='auto', init='random', random_state=42).fit_transform(feat_df)

    
    # t-SNE dimensionality-reduced dataframe
    tsne_df = pd.DataFrame(data=tsne_x_embedding, columns=['tsne 1', 'tsne 2'])

    
    # Add the rest of the original data to the outputing dataframe
    final_df =  pd.concat([tsne_df.reset_index(drop=True), rest_df.reset_index(drop=True)], axis=1, ignore_index=True)

    
    # Set the column names
    col_nam = list(tsne_df.columns) + list(rest_df.columns)
    final_df.columns = col_nam

    
    return final_df

In [26]:
def add_env(seg_name):
    """
    Receives a segment name, returns an environment(location) accordingly
    """
    dictionary = SG.SegmentationDictionary

    try:
        first_dig = seg_name[0]
        second_dig = seg_name[2]
        third_dig = seg_name[4:6]

        
        if second_dig == '2' and third_dig != '08': # bone
            loc = 'Bone'

        elif second_dig == '1' and third_dig == '01': # brain
            loc = 'Brain'

        elif (second_dig == '1' and third_dig in {'02', '03', '24', '27'}) or (second_dig == '4' and third_dig == '01'): # thyroid, thymus, parotid-gland, head and neck
            loc = 'Head and Neck'

        elif second_dig == '1' and third_dig == '04': # breast
            loc = 'Breast'

        elif (second_dig == '1' and third_dig in {'05', '06'}): # broncopulmonary, pleura
            loc = 'Lung'

        elif (second_dig == '1' and third_dig in {'07', '13', '28'}) or (second_dig == '2' and third_dig in {'08'}) or (second_dig == '4' and third_dig in {'02'}): # heart, spleen, skin, muscle, mediastinal space
            loc = 'Others'

        elif (second_dig == '1' and third_dig in {'08', '09', '14', '15'}) or (second_dig == '4' and third_dig in {'03', '04'}): # esophagus, stomach, small_instestine, colon/rectum
            loc = 'Gastrointestinal'

        elif second_dig == '1' and third_dig in {'10', '11', '12'}: # liver, biliary system, pancreas
            loc = 'Hepatobiliary'

        elif second_dig == '1' and third_dig in {'16', '25'}: # peritoneum and peritoneal carcinomatosis
            loc = 'Peritoneum'
      
        elif second_dig == '1' and third_dig == '17': # adrenal
            loc = 'Adrenal'
            
        elif second_dig == '1' and third_dig in {'18', '19', '20'}: # kidney, bladder, prostate  
            loc = 'Urinary System'

        elif second_dig == '1' and third_dig in {'21', '22', '23', '26'}: # ovary, uterus, genitals, cervix
            loc = 'Gynecological'

        elif second_dig == '3' and third_dig in {'01', '02', '03'}: # head and neck LN, supraclavicular LN
            loc = 'Lymph Node'

        elif second_dig == '3' and third_dig in {'04', '05', '06', '07', '08', '09', '10', '11', '12', '13', '14'}: # thoracic LNs
            loc = 'Lymph Node'

        elif second_dig == '3' and third_dig in {'15', '16', '17', '18', '19', '20', '21', '22'}: # abdominopelvic LNs
            loc = 'Lymph Node'

        elif second_dig == '4' and third_dig == '05': # Subcutaneous fat
            loc = 'Subcutaneous Fat'
        
        return loc
            

    except KeyError:
        print(f'segment name error: {seg_name}')
        return None

###############################################################################################################################

## Load the datasets

In [None]:
discovery_all = pd.read_csv(PATH_DISCOVERY_CSV)
discovery_all

In [None]:
eroded_all = pd.read_csv(PATH_ERODED_DISCOVERY_CSV)
eroded_all

In [None]:
validation_all = pd.read_csv(PATH_VALIDATION_CSV)
validation_all

## Add environment column

In [None]:
discovery_all['environment'] = discovery_all['Segment Name'].apply(add_env)
discovery_all

In [None]:
eroded_all['environment'] = eroded_all['Segment Name'].apply(add_env)
eroded_all

In [None]:
validation_all['environment'] = validation_all['Segment Name'].apply(add_env)
validation_all

In [None]:
discovery_all['environment'].isnull().sum()

In [None]:
eroded_all['environment'].isnull().sum()

In [None]:
validation_all['environment'].isnull().sum()

## Convert the datasets to t_SNE dataframes 

In [None]:
tsne_discovery_all_df = TSNE_maker(df=discovery_all, sub_analysis=None)

In [None]:
tsne_eroded_all_df = TSNE_maker(df=eroded_all, sub_analysis=None)

In [None]:
tsne_validation_all_df = TSNE_maker(df=validation_all, sub_analysis=None)

## Add Sub-analysis

Discovery:

In [None]:
tsne_discovery_shape_df = TSNE_maker(df=discovery_all, sub_analysis=['shape'])

In [None]:
tsne_discovery_intensity_df = TSNE_maker(df=discovery_all, sub_analysis=['intensity'])

In [None]:
tsne_discovery_texture_df = TSNE_maker(df=discovery_all, sub_analysis=['texture'])

In [None]:
tsne_discovery_intensitytexture_df = TSNE_maker(df=discovery_all, sub_analysis=['intensity', 'texture'])

Eroded:

In [None]:
tsne_eroded_shape_df = TSNE_maker(df=eroded_all, sub_analysis=['shape'])

In [None]:
tsne_eroded_intensity_df = TSNE_maker(df=eroded_all, sub_analysis=['intensity'])

In [None]:
tsne_eroded_texture_df = TSNE_maker(df=eroded_all, sub_analysis=['texture'])

In [None]:
tsne_eroded_intensitytexture_df = TSNE_maker(df=eroded_all, sub_analysis=['intensity', 'texture'])

Validation:

In [None]:
tsne_validation_shape_df = TSNE_maker(df=validation_all, sub_analysis=['shape'])

In [None]:
tsne_validation_intensity_df = TSNE_maker(df=validation_all, sub_analysis=['intensity'])

In [None]:
tsne_validation_texture_df = TSNE_maker(df=validation_all, sub_analysis=['texture'])

In [None]:
tsne_validation_intensitytexture_df = TSNE_maker(df=validation_all, sub_analysis=['intensity', 'texture'])

## Save the t_SNE dataframes to CSV

In [None]:
tsne_discovery_all_df.to_csv('LOCAL PATH')
tsne_eroded_all_df.to_csv('LOCAL PATH')
tsne_validation_all_df.to_csv('LOCAL PATH')

In [None]:
tsne_discovery_shape_df.to_csv('LOCAL PATH')
tsne_discovery_intensity_df.to_csv('LOCAL PATH')
tsne_discovery_texture_df.to_csv('LOCAL PATH')
tsne_discovery_intensitytexture_df.to_csv('LOCAL PATH')

In [None]:
tsne_eroded_shape_df.to_csv('LOCAL PATH')
tsne_eroded_intensity_df.to_csv('LOCAL PATH')
tsne_eroded_texture_df.to_csv('LOCAL PATH')
tsne_eroded_intensitytexture_df.to_csv('LOCAL PATH')

In [None]:
tsne_validation_shape_df.to_csv('Z:/active_Sajjad/11- Personal/CT tumor morphology/Processed Datasets/tsne_validation_shape.csv')
tsne_validation_intensity_df.to_csv('Z:/active_Sajjad/11- Personal/CT tumor morphology/Processed Datasets/tsne_validation_intensity.csv')
tsne_validation_texture_df.to_csv('Z:/active_Sajjad/11- Personal/CT tumor morphology/Processed Datasets/tsne_validation_texture.csv')
tsne_validation_intensitytexture_df.to_csv('Z:/active_Sajjad/11- Personal/CT tumor morphology/Processed Datasets/tsne_validation_intensitytexture.csv')