In [1]:
import pyreadstat
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statistics

from sklearn.preprocessing import LabelEncoder, OrdinalEncoder, MinMaxScaler, StandardScaler, RobustScaler

### Generating the clinical data

In [3]:
### Venous Blood Study dataset
hrsvbs_df, meta = pyreadstat.read_sas7bdat(r'/panfs/jay/groups/21/thyagara/sesha059/HRS/HRS datasets/hrs2016vbs.sas7bdat')
hrsvbs_df['HHID'] = hrsvbs_df['HHID'].astype('int64')
hrsvbs_df['PN'] = hrsvbs_df['PN'].astype('int64')
hrsvbs_df['PCMVGINT'] = hrsvbs_df['PCMVGINT'].replace([1.0, 2.0, 3.0], ['Negative', 'Negative', 'Positive'])

# immune ARIPs
hrsvbs_df['MLR'] = hrsvbs_df['PAMON']/ hrsvbs_df['PALYM']
hrsvbs_df['NLR'] = hrsvbs_df['PANEU']/ hrsvbs_df['PALYM']

hrsvbs_df = hrsvbs_df.loc[:, ('HHID', 'PN', 'PALYM', 'PANEU', 'MLR', 'NLR', 'PLDLC', 'PPLT', 'PRBC', 'PWBC', 'PCMVGINT')]
hrsvbs_df

Unnamed: 0,HHID,PN,PALYM,PANEU,MLR,NLR,PLDLC,PPLT,PRBC,PWBC,PCMVGINT
0,10013,40,3.9,6.7,0.256410,1.717949,97.0,400.0,4.45,11.9,Positive
1,10038,10,1.5,3.9,0.333333,2.600000,85.0,161.0,5.10,6.0,Negative
2,10038,40,1.1,3.6,0.363636,3.272727,132.0,294.0,4.53,5.4,Negative
3,10075,20,3.0,3.7,0.166667,1.233333,123.0,255.0,4.29,7.3,Positive
4,10147,10,1.6,1.9,0.187500,1.187500,68.0,156.0,3.35,4.0,Positive
...,...,...,...,...,...,...,...,...,...,...,...
9929,923333,10,2.5,4.6,0.240000,1.840000,74.0,344.0,5.01,7.9,Positive
9930,923489,20,3.8,5.2,0.210526,1.368421,86.0,234.0,4.26,10.1,Positive
9931,923498,10,2.4,4.9,0.208333,2.041667,167.0,256.0,4.72,8.0,Negative
9932,952836,10,1.6,2.4,0.187500,1.500000,152.0,240.0,5.45,4.5,Positive


In [4]:
def combine_race_ethnicity(df):
    # Define conditions
    hispanic_condition = df['HISPANIC'].isin([1, 2, 3])
    non_hispanic_white = (df['RACE'] == 1) & ~hispanic_condition
    non_hispanic_black = (df['RACE'] == 2) & ~hispanic_condition
    non_hispanic_other = (df['RACE'] == 7) & ~hispanic_condition
    
    # Apply conditions to create a new 'RACE' column
    df['RACE'] = np.select(
        [hispanic_condition, non_hispanic_white, non_hispanic_black, non_hispanic_other],
        ['Hispanic', 'Non-Hispanic White', 'Non-Hispanic Black', 'Non-Hispanic Other'],
        default='Not Obtained'
    )
    return df

In [5]:
### Tracker file
trk2020_df, meta = pyreadstat.read_sas7bdat(r'/panfs/jay/groups/21/thyagara/sesha059/HRS/HRS datasets/trk2020tr_r.sas7bdat')
trk2020_df['HHID'] = trk2020_df['HHID'].astype('int64')
trk2020_df['PN'] = trk2020_df['PN'].astype('int64')
trk2020_df['SCHLYRS'][trk2020_df['SCHLYRS'] == 99] = np.nan

## combining race and ethnicity together into a single variable called RACE
trk2020_df = combine_race_ethnicity(trk2020_df)

## defining the mortality variable at year 2020
trk2020_df.loc[(trk2020_df['RALIVE']==2) | (trk2020_df['RALIVE']==1), 'RALIVE'] = 0
trk2020_df.loc[(trk2020_df['RALIVE']==5) | (trk2020_df['RALIVE']==6), 'RALIVE'] = 1

## selecting only those with valid age values
trk2020_df = trk2020_df.loc[(trk2020_df['PAGE'] != 999), :]
print("Age in-bound:", trk2020_df.shape)

trk2020_df = trk2020_df[['HHID', 'PN', 'PAGE', 'GENDER', 'RACE', 'RALIVE', 'SCHLYRS']]

trk2020_df

Age in-bound: (20912, 552)


You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  trk2020_df['SCHLYRS'][trk2020_df['SCHLYRS'] == 99] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  trk

Unnamed: 0,HHID,PN,PAGE,GENDER,RACE,RALIVE,SCHLYRS
4,10001,10,76.0,1.0,Non-Hispanic White,0.0,12.0
6,10003,30,60.0,2.0,Non-Hispanic White,0.0,16.0
8,10004,40,70.0,2.0,Non-Hispanic White,0.0,12.0
11,10013,40,68.0,2.0,Non-Hispanic White,0.0,13.0
12,10038,10,79.0,1.0,Non-Hispanic White,0.0,16.0
...,...,...,...,...,...,...,...
43552,923525,10,59.0,2.0,Non-Hispanic Black,0.0,14.0
43553,923525,20,61.0,1.0,Non-Hispanic Black,0.0,12.0
43554,952836,10,56.0,2.0,Hispanic,0.0,6.0
43555,958361,10,51.0,1.0,Non-Hispanic Black,0.0,11.0


In [7]:
### loading flowcytometry dataset to include immune cell counts
flocyt_df, meta = pyreadstat.read_sas7bdat(rf'/panfs/jay/groups/21/thyagara/sesha059/HRS/HRS datasets/flocyt2016.sas7bdat')
flocyt_df['HHID'] = flocyt_df['HHID'].astype('int64')
flocyt_df['PN'] = flocyt_df['PN'].astype('int64')

# immune ARIPS
flocyt_df['CD4/CD8'] = flocyt_df['PCD4T_COUNT']/ flocyt_df['PCD8T_COUNT']
flocyt_df['CD4/CD8']  = (flocyt_df['CD4/CD8'] < 1) * 1

flocyt_df['IHG'] = None
flocyt_df.loc[(flocyt_df['CD4/CD8'] >= 1) & (flocyt_df['PCD4T_COUNT'] * 10**3 >= 800), 'IHG'] = "1" 
flocyt_df.loc[(flocyt_df['CD4/CD8'] >= 1) & (flocyt_df['PCD4T_COUNT'] * 10**3 < 800), 'IHG'] = "2" 
flocyt_df.loc[(flocyt_df['CD4/CD8'] < 1) & (flocyt_df['PCD4T_COUNT'] * 10**3 >= 800), 'IHG'] = "3"
flocyt_df.loc[(flocyt_df['CD4/CD8'] < 1) & (flocyt_df['PCD4T_COUNT'] * 10**3 < 800), 'IHG'] = "4" 

flocyt_df = flocyt_df[['HHID', 'PN', 'PTCELL_COUNT', 
                       'PCD4T_COUNT', 'PCD4N_COUNT', 'PCD4N_PCT', 'PCD4TEMRA_COUNT', 'PCD4TEM_COUNT', 'PCD4M_COUNT',
                       'PCD8T_COUNT', 'PCD8N_COUNT', 'PCD8TEMRA_COUNT', 'PCD8TEM_COUNT', 'PCD8M_COUNT',
                       'PBCELL_COUNT', 'PNAIVEB_COUNT', 'PIGD_PLUS_MEMB_COUNT', 'PIGD_MINUS_MEMB_COUNT',
                       'CD4/CD8', 'IHG']]
flocyt_df

Unnamed: 0,HHID,PN,PTCELL_COUNT,PCD4T_COUNT,PCD4N_COUNT,PCD4N_PCT,PCD4TEMRA_COUNT,PCD4TEM_COUNT,PCD4M_COUNT,PCD8T_COUNT,PCD8N_COUNT,PCD8TEMRA_COUNT,PCD8TEM_COUNT,PCD8M_COUNT,PBCELL_COUNT,PNAIVEB_COUNT,PIGD_PLUS_MEMB_COUNT,PIGD_MINUS_MEMB_COUNT,CD4/CD8,IHG
0,10013,40,2.492064,1.933823,1.469732,0.760014,0.061814,0.000000,0.294365,0.415773,0.157491,0.180291,0.003232,0.017207,0.428221,0.215931,0.027648,0.080530,0,3
1,10038,10,1.161003,0.863462,0.482453,0.558743,0.002904,0.000000,0.338639,0.235303,0.033606,0.153484,0.007716,0.016946,0.109160,0.046752,0.032907,0.013801,0,3
2,10038,40,0.263668,0.185634,0.085323,0.459628,0.000979,0.000000,0.073321,0.049602,0.014525,0.017950,0.001004,0.005254,0.171753,0.129388,0.014062,0.012722,0,4
3,10075,20,1.133710,0.872412,0.269250,0.308627,0.011533,0.002314,0.467886,0.183393,0.080622,0.042882,0.000000,0.021267,0.703674,0.491301,0.105379,0.051128,0,3
4,10147,10,1.287578,0.761716,0.121553,0.159578,0.056055,0.023296,0.365694,0.466784,0.094068,0.271646,0.002933,0.019288,0.038435,0.021659,0.005294,0.007695,0,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9928,923333,10,2.049042,1.185874,0.367520,0.309915,0.132736,,0.400265,0.577152,0.101299,0.296179,0.005584,0.031124,0.100929,0.062600,0.009623,0.020260,0,3
9929,923489,20,2.711510,1.666456,0.676027,0.405668,0.186903,0.026309,0.599909,0.804765,0.174309,0.537977,0.010304,0.026026,0.532937,0.444461,0.032128,0.017091,0,3
9930,923498,10,1.527197,1.044798,0.390870,0.374111,0.036680,,0.493607,0.437445,0.382937,0.029484,,,0.506991,0.308480,0.084208,0.042041,0,3
9931,952836,10,1.047363,0.798387,0.196060,0.245571,0.036393,0.005617,0.357443,0.198369,0.098280,0.046041,0.000000,0.015132,0.134444,0.097289,0.007133,0.017526,0,4


In [8]:
# Getting the inflammation latent variables
inflam_df = pd.read_csv(rf'/panfs/jay/groups/21/thyagara/sesha059/HRS/HRS datasets/HRS-inflamlvs.csv')
inflam_df = inflam_df[['HHID', 'PN', 'INFLAM_5']]

inflam_df

Unnamed: 0,HHID,PN,INFLAM_5
0,10013,40,-0.349
1,10038,10,-0.252
2,10038,40,-0.623
3,10075,20,0.112
4,10147,10,0.267
...,...,...,...
9868,923333,10,0.474
9869,923489,20,0.162
9870,923498,10,-0.255
9871,952836,10,-0.167


In [9]:
### Helper-Function to create comorbidity index
def comorb(df):
    df['comorb_idx'] = (
        np.int32(df['PC005'].isin([1, 3])) +  # Hypertension
        np.int32(df['PC010'].isin([1, 3])) +  # Diabetes Mellitus
        np.int32(df['PC018'].isin([1, 3])) +  # Cancer
        np.int32(df['PC030'].isin([1, 3])) +  # Lung Disease
        np.int32(df['PC036'].isin([1, 3])) +  # Cardiac Disorder
        np.int32(df['PC053'].isin([1, 3])) +  # Stroke
        np.int32(df['PC070'].isin([1, 3])) +  # Arthritis
        np.int32(df['PC065'].isin([1, 3]))    # Psychiatric Problems
    )
    
    return df

In [10]:
### Physical Health 2016 file
ph_df_16, meta = pyreadstat.read_sas7bdat(r'/panfs/jay/groups/21/thyagara/sesha059/HRS/HRS datasets/h16c_r.sas7bdat')
ph_df_16['HHID'] = ph_df_16['HHID'].astype('int64')
ph_df_16['PN'] = ph_df_16['PN'].astype('int64')

# Creating comorbidity index
ph_df_16 = comorb(ph_df_16)
print(ph_df_16['comorb_idx'].describe())

# Selecting only the necessary columns
ph_df_16 = ph_df_16[['HHID', 'PN', 'PC139', 'PC141', 'PC142', 'PC116', 'PC117', 'comorb_idx']]

# Ignoring invalid entries in weight, feet, and inches
ph_df_16 = ph_df_16[~(ph_df_16['PC139'].isin([998, 999]) | ph_df_16['PC141'].isin([8, 9]) | ph_df_16['PC142'].isin([98, 99]))]
ph_df_16['height'] = ph_df_16['PC141'] * 12 + ph_df_16['PC142']

# Calculating BMI
ph_df_16['BMI'] = (ph_df_16['PC139']/ np.square(ph_df_16['height'])) * 703

# Calculating smoking status
preload, meta = pyreadstat.read_sas7bdat(rf'/panfs/jay/groups/21/thyagara/sesha059/HRS/HRS datasets/h16pr_r.sas7bdat')
preload = preload[['HHID', 'PN', 'PZ205']]
preload['HHID'] = preload['HHID'].astype('int64')
preload['PN'] = preload['PN'].astype('int64')
ph_df_16 = pd.merge(ph_df_16, preload, how='inner', on=['HHID', 'PN'])

conditions = [
    (ph_df_16['PC117'] == 1),  # Current smokers
    (ph_df_16['PC117'] == 5) & ((ph_df_16['PZ205'] == 1) | (ph_df_16['PC116'] == 1)),  # Former smokers
    (ph_df_16['PZ205'] == 5) | (ph_df_16['PC116'] == 5)  # Never smokers
    ]
choices = [
    "Current smokers",
    "Former smokers",
    "Never smokers"
    ]
ph_df_16['smoke_stat'] = np.select(conditions, choices, default="Never smokers")

ph_df_16.drop(['PC139', 'PC141', 'PC142', 'PC116', 'PC117', 'PZ205', 'height'], axis = 1, inplace = True)

### Physical Health 2014 file
ph_df_14, meta = pyreadstat.read_sas7bdat(r'/panfs/jay/groups/21/thyagara/sesha059/HRS/HRS datasets/h14c_r.sas7bdat')
ph_df_14 = ph_df_14[['HHID', 'PN', 'OC139', 'OC141', 'OC142']]
ph_df_14['HHID'] = ph_df_14['HHID'].astype('int64')
ph_df_14['PN'] = ph_df_14['PN'].astype('int64')

# Ignoring invalid entries in weight, feet, and inches
ph_df_14 = ph_df_14[~(ph_df_14['OC139'].isin([998, 999]) | ph_df_14['OC141'].isin([8, 9]) | ph_df_14['OC142'].isin([98, 99]))]
ph_df_14['height'] = ph_df_14['OC141'] * 12 + ph_df_14['OC142']

# Calculating BMI
ph_df_14['BMI'] = (ph_df_14['OC139']/ np.square(ph_df_14['height'])) * 703
ph_df_14.drop(['OC139', 'OC141', 'OC142', 'height'], axis = 1, inplace = True)

### Combiding 2014 and 2016 Physical Health Data
ph_df = pd.merge(ph_df_16, ph_df_14, how='left', on=['HHID', 'PN'])

ph_df['BMI'] = ph_df.loc[:, ['BMI_x', 'BMI_y']].mean(axis=1)
ph_df.drop(['BMI_x', 'BMI_y'], axis = 1, inplace = True)

ph_df

count    20912.000000
mean         2.132029
std          1.531248
min          0.000000
25%          1.000000
50%          2.000000
75%          3.000000
max          8.000000
Name: comorb_idx, dtype: float64


Unnamed: 0,HHID,PN,comorb_idx,smoke_stat,BMI
0,10001,10,0,Never smokers,19.523904
1,10003,30,3,Never smokers,35.896264
2,10004,40,0,Former smokers,24.459408
3,10013,40,1,Current smokers,26.517950
4,10038,10,3,Never smokers,23.625289
...,...,...,...,...,...
20514,923525,10,2,Current smokers,24.126627
20515,923525,20,0,Current smokers,21.615751
20516,952836,10,1,Never smokers,
20517,958361,10,1,Current smokers,23.108108


In [16]:
### Combining the datasets together
res_df = hrsvbs_df.merge(flocyt_df, how='left', on=['HHID', 'PN'])
res_df = res_df.merge(trk2020_df, how='left', on=['HHID', 'PN'])
res_df = res_df.merge(inflam_df, how='left', on=['HHID', 'PN'])
res_df = res_df.merge(ph_df, how='left', on=['HHID', 'PN'])

### renaming columns
res_df = res_df.rename(columns = {
    'PALYM': 'lymp',
    'PANEU': 'neut',
    'PLDLC': 'LDL_chol',
    'PPLT': 'platelets',
    'PRBC': 'RBC',
    'PWBC': 'WBC',
    'PCMVGINT': 'CMV_status',
    'PTCELL_COUNT': 'Tcells',
    'PCD4T_COUNT': 'CD4',
    'PCD4N_COUNT': 'CD4N',
    'PCD4N_PCT': 'CD4N_pct',
    'PCD4TEMRA_COUNT': 'CD4eff',
    'PCD4TEM_COUNT': 'CD4EM',
    'PCD4M_COUNT': 'CD4CM',
    'PCD8T_COUNT': 'CD8',
    'PCD8N_COUNT': 'CD8N',
    'PCD8TEMRA_COUNT': 'CD8eff',
    'PCD8TEM_COUNT': 'CD8EM',
    'PCD8M_COUNT': 'CD8CM',
    'PBCELL_COUNT': 'Bcells',
    'PNAIVEB_COUNT': 'Bnaive',
    'PIGD_PLUS_MEMB_COUNT': 'IgD_plus_B',
    'PIGD_MINUS_MEMB_COUNT': 'IgD_minus_B',
    'PAGE': 'age',
    'GENDER': 'sex',
    'RACE': 'race',
    'RALIVE': 'mortality',
    'SCHLYRS': 'educ'
})

res_df

Unnamed: 0,HHID,PN,lymp,neut,MLR,NLR,LDL_chol,platelets,RBC,WBC,...,IHG,age,sex,race,mortality,educ,INFLAM_5,comorb_idx,smoke_stat,BMI
0,10013,40,3.9,6.7,0.256410,1.717949,97.0,400.0,4.45,11.9,...,3,68.0,2.0,Non-Hispanic White,0.0,13.0,-0.349,1.0,Current smokers,26.517950
1,10038,10,1.5,3.9,0.333333,2.600000,85.0,161.0,5.10,6.0,...,3,79.0,1.0,Non-Hispanic White,0.0,16.0,-0.252,3.0,Never smokers,23.625289
2,10038,40,1.1,3.6,0.363636,3.272727,132.0,294.0,4.53,5.4,...,4,73.0,2.0,Non-Hispanic White,0.0,16.0,-0.623,1.0,Former smokers,23.078283
3,10075,20,3.0,3.7,0.166667,1.233333,123.0,255.0,4.29,7.3,...,3,79.0,2.0,Hispanic,0.0,8.0,0.112,2.0,Never smokers,30.724246
4,10147,10,1.6,1.9,0.187500,1.187500,68.0,156.0,3.35,4.0,...,4,83.0,2.0,Hispanic,1.0,8.0,0.267,5.0,Never smokers,23.513428
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9929,923333,10,2.5,4.6,0.240000,1.840000,74.0,344.0,5.01,7.9,...,3,57.0,2.0,Non-Hispanic White,0.0,16.0,0.474,0.0,Never smokers,35.505051
9930,923489,20,3.8,5.2,0.210526,1.368421,86.0,234.0,4.26,10.1,...,3,51.0,2.0,Non-Hispanic Black,0.0,13.0,0.162,0.0,Never smokers,34.110059
9931,923498,10,2.4,4.9,0.208333,2.041667,167.0,256.0,4.72,8.0,...,3,61.0,2.0,Non-Hispanic Black,0.0,10.0,-0.255,1.0,Former smokers,31.321007
9932,952836,10,1.6,2.4,0.187500,1.500000,152.0,240.0,5.45,4.5,...,4,56.0,2.0,Hispanic,0.0,6.0,-0.167,1.0,Never smokers,


In [12]:
res_df.columns

Index(['HHID', 'PN', 'lymp', 'neut', 'MLR', 'NLR', 'LDL_chol', 'platelets',
       'RBC', 'WBC', 'CMV_status', 'Tcells', 'CD4', 'CD4N', 'CD4N_pct',
       'CD4eff', 'CD4EM', 'CD4CM', 'CD8', 'CD8N', 'CD8eff', 'CD8EM', 'CD8CM',
       'Bcells', 'Bnaive', 'IgD+ B', 'IgD- B', 'CD4/CD8', 'IHG', 'age', 'sex',
       'race', 'mortality', 'educ', 'INFLAM_5', 'comorb_idx', 'smoke_stat',
       'BMI'],
      dtype='object')

In [None]:
### Lab ID xwalk file to get HHID, PN
FID_xwalk = pd.read_excel('/home/thyagara/sesha059/HRS/HRS datasets/Lab Ticket Crosswalk WBD 2016 with HHID PN.xlsx', usecols=['Lab ID', 'hhid', 'pn'])
FID_xwalk = FID_xwalk.rename(columns = {'hhid': 'HHID', 'pn': 'PN'})

In [18]:
res_df = res_df.merge(FID_xwalk, how="inner", on=["HHID", "PN"])
res_df

Unnamed: 0,HHID,PN,lymp,neut,MLR,NLR,LDL_chol,platelets,RBC,WBC,...,age,sex,race,mortality,educ,INFLAM_5,comorb_idx,smoke_stat,BMI,Lab ID
0,10013,40,3.9,6.7,0.256410,1.717949,97.0,400.0,4.45,11.9,...,68.0,2.0,Non-Hispanic White,0.0,13.0,-0.349,1.0,Current smokers,26.517950,F1631028
1,10038,10,1.5,3.9,0.333333,2.600000,85.0,161.0,5.10,6.0,...,79.0,1.0,Non-Hispanic White,0.0,16.0,-0.252,3.0,Never smokers,23.625289,F1631340
2,10038,40,1.1,3.6,0.363636,3.272727,132.0,294.0,4.53,5.4,...,73.0,2.0,Non-Hispanic White,0.0,16.0,-0.623,1.0,Former smokers,23.078283,F1636464
3,10075,20,3.0,3.7,0.166667,1.233333,123.0,255.0,4.29,7.3,...,79.0,2.0,Hispanic,0.0,8.0,0.112,2.0,Never smokers,30.724246,F1638761
4,10147,10,1.6,1.9,0.187500,1.187500,68.0,156.0,3.35,4.0,...,83.0,2.0,Hispanic,1.0,8.0,0.267,5.0,Never smokers,23.513428,F1636531
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9928,923333,10,2.5,4.6,0.240000,1.840000,74.0,344.0,5.01,7.9,...,57.0,2.0,Non-Hispanic White,0.0,16.0,0.474,0.0,Never smokers,35.505051,F1631516
9929,923489,20,3.8,5.2,0.210526,1.368421,86.0,234.0,4.26,10.1,...,51.0,2.0,Non-Hispanic Black,0.0,13.0,0.162,0.0,Never smokers,34.110059,F1636102
9930,923498,10,2.4,4.9,0.208333,2.041667,167.0,256.0,4.72,8.0,...,61.0,2.0,Non-Hispanic Black,0.0,10.0,-0.255,1.0,Former smokers,31.321007,F1632688
9931,952836,10,1.6,2.4,0.187500,1.500000,152.0,240.0,5.45,4.5,...,56.0,2.0,Hispanic,0.0,6.0,-0.167,1.0,Never smokers,,F1638891


In [None]:
res_df.to_csv('Clinical_Data.csv', index=False)

### Incorportaing the RNAseq data