Documentation: https://altair-viz.github.io/getting_started/overview.html

**Note: You need to run this code in the command line before the above will work: conda install -c conda-forge altair vega_datasets notebook vega**

In [3]:
import altair as alt
import pandas as pd

alt.renderers.enable('notebook')

RendererRegistry.enable('notebook')

### Read the excel file and csv file into pandas dataframes
Look at your current working directory (!pwd) to know what filepath to pass into read_excel()

In [4]:
clinicaldf = pd.read_excel('UCEC_clinical_genotype_phenotype_V1.1.tsi.xlsx', sheet_name="CPTAC3_UCEC_clinical_genotypes_")
clinicaldf.drop(clinicaldf.index[0], inplace=True)
clinicaldf.head()

Unnamed: 0,idx,Histologic_Grade_(FIGO),Histologic_Type,Num_full_term_pregnancies,Tumor_Size_(cm),FIGO_stage,Myometrial_invasion_Specify,Diabetes,BMI,LVSI,...,PI3K-PTEN,PI3K-PIK3R2,TP53-ATM,TP53-TP53,TP53-TP53_pathway,HRD-BRCA1,HRD-BRCA1_or_BRCA2,HRD-BRCA2,MSI,POLE
1,C3L-00006,1,0,1,2.9,0,1,0,38.88,1.0,...,1,0,0,1,1,0,0,0,1,0
2,C3L-00008,1,0,1,3.5,0,1,0,39.76,0.0,...,1,0,0,0,0,0,0,0,1,0
3,C3L-00032,2,0,4,4.5,0,1,1,51.19,0.0,...,0,0,0,0,0,0,0,0,0,0
4,C3L-00090,2,0,4,3.5,0,1,1,32.69,0.0,...,1,0,0,0,0,0,0,0,0,0
5,C3L-00098,3,0,2,6.0,0,1,0,20.28,,...,0,0,0,1,1,0,0,0,0,0


**Note: In order to read a .cct file, first change the extension to .csv then import into Excel (File --> import --> CSV File, choose file, select Delimited, click "Next", check the box next to "Space", click "Finish")**

In [9]:
phosdf = pd.read_excel('UCEC_phosphoproteomics_site_level_V1.xlsx', sheet_name="data")
phosdf.head()

Unnamed: 0,idx,C3L-00136.T,C3L-00098.T,C3L-00090.T,C3L-00008.T,C3L-00032.T,C3L-00006.T,C3L-00006.N,C3L-00139.T,C3L-00143.T,...,C3N-01349.T,C3N-03320.N,C3N-03411.N,NM_Mix1.N,C3N-03691a.N,C3N-03691b.N,C3N-03692.N,C3L-03601a.N,C3L-03601b.N,NM_Mix2.N
0,AAAS-S495,,1.511548,1.112944,,0.81752,,,1.418852,0.869111,...,0.98761,0.860698,0.567761,0.589163,0.655917,0.576749,0.945281,,,1.037036
1,AAK1-S18,0.879774,1.556005,,0.692318,1.334778,0.821719,1.107258,1.336072,0.726407,...,1.004323,0.820971,0.930256,0.855603,1.676428,1.227982,1.139254,1.418611,1.994327,0.6654
2,AAK1-S20,0.696099,0.964754,0.6872,0.453538,0.966963,1.35204,0.905931,1.302045,0.880087,...,1.000152,1.166823,0.96571,0.96657,1.34424,1.650865,0.97537,0.928651,1.861469,0.550828
3,AAK1-S21,0.914304,0.949023,0.887574,1.068494,0.956485,1.434344,1.136394,1.239891,0.573554,...,0.598483,0.654774,,0.55372,,1.086362,0.659436,1.007299,1.331197,0.590478
4,AAK1-S624,2.024795,1.103993,1.153816,0.631918,0.97728,0.615934,0.759955,0.856731,1.114945,...,0.969571,1.067636,1.047008,0.831194,0.904264,1.051562,0.950879,0.71873,1.412013,0.76469


### Make a scatter plot

In [6]:
alt.Chart(clinicaldf).mark_point().encode(
    alt.X('BMI', type='quantitative'),
    alt.Y('Tumor_Size_(cm)', type='quantitative'),
    alt.Color('Histologic_Grade_(FIGO)', type='nominal')
).interactive()

<vega.vegalite.VegaLite at 0x11c985c18>



### Bar chart

In [7]:
alt.Chart(clinicaldf).mark_bar().encode(
    alt.X('idx', type='nominal'),
    alt.Y('Num_full_term_pregnancies'),
    alt.Color('Histologic_Grade_(FIGO)', type='nominal')
)

<vega.vegalite.VegaLite at 0x11ca6ed30>



In [18]:
tumorSize = clinicaldf[['idx', 'Tumor_Size_(cm)']]
tumorSize.head()

Unnamed: 0,idx,Tumor_Size_(cm)
1,C3L-00006,2.9
2,C3L-00008,3.5
3,C3L-00032,4.5
4,C3L-00090,3.5
5,C3L-00098,6.0


In [19]:
tumorSize = tumorSize.transpose()
tumorSize.head()

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,92,93,94,95,96,97,98,99,100,101
idx,C3L-00006,C3L-00008,C3L-00032,C3L-00090,C3L-00098,C3L-00136,C3L-00137,C3L-00139,C3L-00143,C3L-00145,...,C3N-01219,C3N-01267,C3N-01346,C3N-01349,C3N-01510,C3N-01520,C3N-01521,C3N-01537,C3N-01802,C3N-01825
Tumor_Size_(cm),2.9,3.5,4.5,3.5,6,4.5,5,4,5.2,4.7,...,5,1.2,5.5,5,8.5,1,4.2,1.5,3.8,1.7


In [22]:
phosTumors = phosdf.filter(regex='[idx|.T]')
phosTumors.head()

Unnamed: 0,C3L-00136.T,C3L-00098.T,C3L-00090.T,C3L-00008.T,C3L-00032.T,C3L-00006.T,C3L-00139.T,C3L-00143.T,C3L-00137.T,C3L-00156.T,...,C3L-01247.T,C3L-01249.T,C3L-01246.T,C3L-01744.T,C3N-01537.T,C3N-01825.T,C3N-01825_rep.T,C3L-01925.T,C3N-01346.T,C3N-01349.T
0,,1.511548,1.112944,,0.81752,,1.418852,0.869111,1.207999,0.947326,...,0.826702,0.181608,1.235125,1.411036,1.357841,0.906307,,1.423261,1.540822,0.98761
1,0.879774,1.556005,,0.692318,1.334778,0.821719,1.336072,0.726407,0.725926,1.214033,...,1.242041,3.487461,1.420847,,0.687436,0.998162,2.002416,1.007287,1.296941,1.004323
2,0.696099,0.964754,0.6872,0.453538,0.966963,1.35204,1.302045,0.880087,0.469054,0.846182,...,1.859153,1.144811,1.622101,3.446092,0.738936,1.455261,1.917427,0.741597,1.313947,1.000152
3,0.914304,0.949023,0.887574,1.068494,0.956485,1.434344,1.239891,0.573554,0.335203,0.579155,...,1.278922,0.848739,1.475542,0.81132,,1.177364,1.708173,0.503021,,0.598483
4,2.024795,1.103993,1.153816,0.631918,0.97728,0.615934,0.856731,1.114945,0.741818,0.779409,...,1.274481,1.634016,1.087621,0.539889,0.937964,1.937846,2.415771,0.994277,1.410381,0.969571
