In [26]:
import pandas as pd
import cptac
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import scipy
import lifelines
from lifelines import KaplanMeierFitter
from lifelines import CoxPHFitter
from lifelines.statistics import proportional_hazard_test
%matplotlib inline

In [2]:
ov = cptac.Ovarian()
ov_foll = ov.get_followup()

                                    

In [3]:
ov_foll.columns

Index(['Short Title', 'Event', 'Modified Time', 'Modified By', 'Status',
       'CRF Name',
       'Date of Last Contact (Do not answer if patient is deceased)',
       'Vital Status (at time of last contact)', 'Date of Death',
       'Tumor Status at Time of Last Contact or Death',
       'Was a Review of the Initial Pathological Findings Done?',
       'Was the Pathology Review consistent with the Diagnosis?',
       'Adjuvant (Post-Operative) Radiation Therapy',
       'Adjuvant (Post-Operative) Pharmaceutical Therapy',
       'Adjuvant (Post-Operative) Immunotherapy',
       'Adjuvant (Post-Operative) Hormone Therapy',
       'Adjuvant (Post-Operative) Targeted Molecular Therapy',
       'Measure of Success of Outcome at the Completion of Initial First Course Treatment (surgery and adjuvant therapies)',
       ' New Tumor Event After Initial Treatment?', 'Type of New Tumor Event',
       'Anatomic Site of New Tumor Event',
       'Other Site of New Tumor Event or Lymph Node Locatio

In [4]:
ov_foll['Vital Status (at time of last contact)'].value_counts()

Living      104
Deceased     10
Name: Vital Status (at time of last contact), dtype: int64

In [5]:
ov_foll[' New Tumor Event After Initial Treatment?'].value_counts()

No     88
Yes    16
Name:  New Tumor Event After Initial Treatment?, dtype: int64

In [6]:
en = cptac.Endometrial()
en_foll = en.get_followup()

                                    

In [7]:
en_foll.columns

Index(['Cohort', 'Tumor code', 'Follow-up Period', 'Lost to Follow-up?',
       'Vital Status', 'Path Diag to Last Contact(Day)', 'Cause of Death',
       'Path Diag to Death(days)', 'Adj Post-Op Radiation Therapy',
       'Adj Post-Op Pharma Therapy', 'Adj Post-Op Immunotherapy',
       'Tumor Status', 'Menopausal Hormone Therapy', 'Oral Contraceptives',
       'Tamoxifen', 'Hypertension', 'Diabetes', 'Num Full Pregnancies',
       'Colorectal Cancer', 'Success of Initial Treatment',
       'Success of Treatment@ Followup', 'ECOG Score', 'Karnofsky Score',
       'Perform Status Scale: Timing', 'Other Perform Scale Timing',
       'New Tumor After Initial TRT', 'Locoregional Recurrence Tumor',
       'Distant Metastasis Tumor', 'New Primary Tumor',
       'New Tumor Site Unknown', 'New Tumor Site Lung', 'New Tumor Site Bone',
       'New Tumor Site Liver', 'New Tumor Site Brain', 'New Tumor Site Other',
       'Specify Other New Tumor Site', 'Path Diag to new Tumor (days)',
       'Ad

In [8]:
en_foll['Vital Status'].value_counts()

Living      400
Deceased      8
Name: Vital Status, dtype: int64

In [9]:
en_foll['New Tumor After Initial TRT'].value_counts()

No     348
Yes     34
Name: New Tumor After Initial TRT, dtype: int64

In [10]:
brca = cptac.Brca()
brca_foll = brca.get_followup()

                                    

In [11]:
brca_foll.columns

Index(['Short Title', 'Event', 'Modified Time', 'Modified By', 'Status',
       'CRF Name', 'Date of Last Contact',
       'Vital Status (at time of last contact)', 'Date of Death',
       'Tumor Status (at time of last contact or death)',
       'Was a Review of the Initial Pathological Findings Done?',
       'Was the Pathology Review consistent with the Diagnosis?',
       'OE_Margin Status after Surgical Procedure',
       'OE_If margins were positive after first surgical resection, what was the surgical procedure performed to achieve negative margins?',
       'OE_Other Surgical Method Performed to Achieve Negative Margins',
       'OE_Margin Status after second surgical resection',
       'OE_Adjuvant (Post-Operative) Clinical Trial Medication Therapy',
       'OE_Adjuvant (Post-Operative) HER2 Targeted Therapy',
       'OE_New Tumor Event After Initial Treatment?',
       'OE_Type of New Tumor Event', 'OE_Anatomic Site of New Tumor Event',
       'OE_Other Site of New Tumor Even

In [12]:
brca_foll['Vital Status (at time of last contact)'].value_counts()

Alive       56
Living      48
Deceased     1
Dead         1
Name: Vital Status (at time of last contact), dtype: int64

In [13]:
brca_foll['New Tumor Event After Initial Treatment?'].value_counts()

No     16
Yes     1
Name: New Tumor Event After Initial Treatment?, dtype: int64

In [14]:
colon = cptac.Ccrcc()
co_foll = colon.get_followup()

                                    

In [15]:
co_foll.columns

Index(['Cohort', 'Tumor code', 'Follow-up Period', 'Lost to Follow-up?',
       'Vital Status', 'Path Diag to Last Contact(Day)', 'Cause of Death',
       'Path Diag to Death(days)', 'Adj Post-Op Radiation Therapy',
       'Adj Post-Op Pharma Therapy', 'Adj Post-Op Immuno Therapy',
       'Tumor Status', 'Success of Initial Treatment',
       'Success of Treament @ Followup', 'ECOG Score', 'Karnofsky Score',
       'Perform Status Scale: Timing', 'Other Perform Scale Timing',
       'New Tumor After Initial TRT', 'Path Diag to new Tumor days',
       'Additional Surgery ?', 'Path Diag to Surgery days     ',
       'Additional Surgery for Mets ?', 'Path Diag to Mets Surgery days',
       'Radiation for New Tumor', 'Pharmaceutical for New Tumor',
       'Immunological for New Tumor'],
      dtype='object', name='Name')

In [16]:
co_foll['New Tumor After Initial TRT'].value_counts()

No     270
Yes     39
Name: New Tumor After Initial TRT, dtype: int64

In [17]:
co_foll['Vital Status'].value_counts()

Living      336
Deceased     16
Name: Vital Status, dtype: int64

In [18]:
luad = cptac.Luad()
lu_foll = luad.get_followup()

                                    



In [19]:
lu_foll.columns

Index(['Cohort', 'Tumor code', 'Follow-up Period', 'Lost to Follow-up?',
       'Vital Status', 'Path Diag to Last Contact(Day)', 'Cause of Death',
       'Path Diag to Death(days)', 'Adj Post-Op Radiation Therapy',
       'Adj Post-Op Pharma Therapy', 'Adj Post-Op Immuno Therapy',
       'Tumor Status', 'Success of Initial Treatment',
       'Success of Treament @ Followup', 'ECOG Score', 'Karnofsky Score',
       'Perform Status Scale: Timing', 'Other Perform Scale Timing',
       'New Tumor After Initial TRT', 'Path Diag to new Tumor days',
       'Locoregional Recurrence Tumor', 'Distant Metastasis Tumor',
       'New Primary Tumor', 'New Tumor Site Lung', 'New Tumor Site Bone',
       'New Tumor Site Liver', 'New Tumor Site Brain', 'New Tumor Site Other',
       'Specify Other New Tumor Site', 'Diagnostic Evidence of Relapse',
       'Additional Surgery Loco-reg', 'Path Diag to Surgery days LR',
       'Additional Surgery for Mets', 'Path Diag to Mets Surgery days',
       'Residu

In [20]:
lu_foll['Vital Status'].value_counts()

Living      209
Deceased     19
Name: Vital Status, dtype: int64

In [21]:
lu_foll['New Tumor After Initial TRT'].value_counts()

No     155
Yes     40
Name: New Tumor After Initial TRT, dtype: int64

In [27]:
renal = cptac.Ccrcc()
renal_foll = renal.get_followup()

                                    

In [28]:
renal_foll.columns

Index(['Cohort', 'Tumor code', 'Follow-up Period', 'Lost to Follow-up?',
       'Vital Status', 'Path Diag to Last Contact(Day)', 'Cause of Death',
       'Path Diag to Death(days)', 'Adj Post-Op Radiation Therapy',
       'Adj Post-Op Pharma Therapy', 'Adj Post-Op Immuno Therapy',
       'Tumor Status', 'Success of Initial Treatment',
       'Success of Treament @ Followup', 'ECOG Score', 'Karnofsky Score',
       'Perform Status Scale: Timing', 'Other Perform Scale Timing',
       'New Tumor After Initial TRT', 'Path Diag to new Tumor days',
       'Additional Surgery ?', 'Path Diag to Surgery days     ',
       'Additional Surgery for Mets ?', 'Path Diag to Mets Surgery days',
       'Radiation for New Tumor', 'Pharmaceutical for New Tumor',
       'Immunological for New Tumor'],
      dtype='object', name='Name')

In [35]:
renal_foll['Vital Status'].value_counts()

Living      336
Deceased     16
Name: Vital Status, dtype: int64

In [31]:
renal_foll['New Tumor After Initial TRT'].value_counts()

No     270
Yes     39
Name: New Tumor After Initial TRT, dtype: int64

In [34]:
renal_foll

Name,Cohort,Tumor code,Follow-up Period,Lost to Follow-up?,Vital Status,Path Diag to Last Contact(Day),Cause of Death,Path Diag to Death(days),Adj Post-Op Radiation Therapy,Adj Post-Op Pharma Therapy,...,Other Perform Scale Timing,New Tumor After Initial TRT,Path Diag to new Tumor days,Additional Surgery ?,Path Diag to Surgery days,Additional Surgery for Mets ?,Path Diag to Mets Surgery days,Radiation for New Tumor,Pharmaceutical for New Tumor,Immunological for New Tumor
Patient_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
C3L-00004,Disc,CCRCC,12 Months,No,Living,384.0,,,No,No,...,,No,,,,,,,,
C3L-00004,Disc,CCRCC,24 Months,Yes,Living,384.0,,,No,No,...,,No,,,,,,,,
C3L-00010,Disc,CCRCC,12 Months,No,Living,423.0,,,No,No,...,,No,,,,,,,,
C3L-00010,Disc,CCRCC,24 Months,No,Living,704.0,,,No,No,...,,No,,,,,,,,
C3L-00010,Disc,CCRCC,36 Months,No,Living,879.0,,,No,No,...,,No,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
C3N-02945,Conf,CCRCC,12 Months,Yes,Living,95.0,,,,,...,,,,,,,,,,
C3N-03018,Conf,CCRCC,12 Months,No,Living,332.0,,,No,No,...,,No,,,,,,,,
C3N-03019,Conf,CCRCC,12 Months,No,Living,332.0,,,No,No,...,,No,,,,,,,,
C3N-03020,Conf,CCRCC,12 Months,No,Living,331.0,,,No,No,...,,No,,,,,,,,
