# Visualisation of single-cell expression data using PCA

In this lab you will use PCA to visualise some single\-cell gene expression data from Guo et al. "Resolution of Cell Fate Decisions Revealed by Single\-Cell Gene Expression Analysis from Zygote to Blastocyst" Developmental Cell, Volume 18, Issue 4, 20 April 2010, Pages 675\-685, available from http://dx.doi.org/10.1016/j.devcel.2010.02.012. The paper pdf is available in the handouts folder for Week 7 or on blackboard. 

Exercise 2: In the Guo et al. paper there are PCA plots in Figure 1B and 1C. Can you reproduce these or similar? You will have to run PCA again on the modified data with cells only from the 64 cell stage (provided below) and make some new plots. 

Note: Our data does not have information about which embryos the cells come from, so you won't be able to colour in the cells by embryo of origin as is done in Figure 1B



In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import visualisation 
import plotly
import plotly.express as px
import plotly.graph_objs as go

## The complete data

In [2]:
GuoDataAll = pd.read_csv('GuoData.csv', index_col=[0])
labelsAll = GuoDataAll.index 

## For exercise 2 you only need a subset of the data

In [3]:

frames = [GuoDataAll.iloc[labelsAll=='64 TE',:],GuoDataAll.iloc[labelsAll=='64 EPI',:],
          GuoDataAll.iloc[labelsAll=='64 PE',:]]
data = pd.concat(frames)
labels = data.index
N, D = data.shape
print('Cells: %s, Genes: %s'%(N, D))

Cells: 159, Genes: 48


In [4]:
data 

Unnamed: 0,Actb,Ahcy,Aqp3,Atp12a,Bmp4,Cdx2,Creb312,Cebpa,Dab2,DppaI,...,Sox2,Sall4,Sox17,Snail,Sox13,Tcfap2a,Tcfap2c,Tcf23,Utf1,Tspan8
64 TE,0.665495,-1.146005,-0.043651,0.842042,-0.487690,0.799261,-0.869909,0.910452,0.809362,1.137222,...,-0.739126,-0.838026,-0.192260,-1.019837,-1.724225,1.276642,-0.082692,-0.937267,-1.343221,1.091156
64 TE,-1.445958,-0.900779,0.473474,1.757569,-0.319247,0.660559,-0.720956,0.579670,0.720583,1.401462,...,-0.957635,-0.302230,-0.582270,-0.920845,-0.098756,0.979717,-0.041204,-0.801565,-1.208539,0.908925
64 TE,0.819992,-0.947243,0.008219,1.215034,-0.425632,0.692393,-0.815032,0.332686,0.754084,1.112491,...,-0.970720,-0.682035,0.249751,-0.983366,-1.667729,1.183739,-0.309658,-0.013062,-1.293601,0.821575
64 TE,0.444786,-0.640064,0.459327,1.137244,-0.388397,-0.060238,-0.782106,0.554677,0.243185,1.082552,...,-1.123807,-0.589344,-1.142467,-0.961484,-0.245646,0.547992,-0.224240,-0.857274,-1.263829,0.928504
64 TE,-0.820615,-1.238933,-0.046795,0.542850,-0.636628,-0.369476,-1.001615,-1.200672,0.581551,0.658206,...,-1.306988,-0.851590,-0.196988,-0.305013,0.487187,0.726511,-0.975912,-1.057256,-1.462308,0.175486
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64 PE,-0.092274,0.999082,-1.987977,-0.639956,-0.117116,-2.404533,2.223613,-0.769920,0.774185,-0.988412,...,-0.162106,0.430249,1.790883,0.833912,-0.666944,-1.147940,-0.651326,1.041132,0.986072,-1.479647
64 PE,-0.364482,0.196288,-2.017841,-0.677853,-0.150805,-1.165308,2.071524,-0.797853,0.246535,-1.013143,...,0.189864,0.672148,2.024889,-0.048677,-1.417533,-1.182551,-1.088174,0.362619,0.647240,-1.508262
64 PE,-0.143773,0.738367,-1.998979,-0.653918,-0.129528,-2.420450,1.754802,0.187142,0.976869,-0.997523,...,-0.932775,0.554590,1.396145,0.593206,-1.398162,-1.160691,-0.834363,0.973995,0.573519,-1.490190
64 PE,-0.570478,0.490559,-2.005266,-0.661896,-0.136620,-2.429545,1.964905,-0.786092,0.601652,-0.237346,...,-0.022103,0.486768,1.552149,0.447323,0.513014,-1.167978,-0.751387,1.488236,0.519646,-1.496214


In [5]:
W, scores, fracs = visualisation.do_pca(data)
scores = scores/abs(scores).max().max() 
scores.index = labels

W

Unnamed: 0,PC1,PC2,PC3,PC4,PC5,PC6,PC7,PC8,PC9,PC10,PC11,PC12,PC13,PC14,PC15,PC16,PC17,PC18,PC19,PC20
Actb,0.152286,0.035629,0.066706,0.117757,-0.325384,-0.281187,0.105944,-0.288777,0.187154,-0.088695,-0.211079,0.00836,-0.17477,0.058617,-0.023996,-0.013277,-0.048724,-0.040541,-0.023277,0.050191
Ahcy,-0.181832,-0.032441,0.073755,0.042707,-0.043371,-0.043035,-0.21616,0.016861,-0.035044,0.089579,-0.075333,-0.10791,-0.012188,-0.003712,-0.095901,-0.031822,0.029839,0.18175,-0.10781,0.10189
Aqp3,0.159188,-0.19922,0.021798,-0.013931,-0.018183,0.004595,0.059698,0.023504,-0.103533,-0.148183,-0.095706,-0.185626,0.087446,-0.100968,-0.044408,0.000474,-0.079052,0.055825,0.194438,-0.04884
Atp12a,0.148708,-0.041601,0.148646,0.240917,-0.103436,0.036233,0.036689,0.04596,-0.095627,0.050345,0.13332,-0.356075,-0.040136,-0.277431,0.086178,0.176981,0.193581,-0.244556,-0.028099,0.055278
Bmp4,-0.145029,-0.213896,0.046377,-0.042412,-0.072121,0.082243,0.03094,-0.080374,0.138164,-0.047067,-0.079272,-0.041232,-0.091182,-0.178341,0.043118,0.319238,-0.049522,0.165062,0.016346,-0.124586
Cdx2,0.108396,-0.050671,0.269226,0.075623,0.24949,0.019402,-0.131683,-0.040963,-0.392557,-0.003821,-0.191661,0.011963,-0.413715,-0.209166,-0.274853,-0.195888,0.103393,-0.052706,-0.006282,0.072082
Creb312,-0.168985,0.167319,0.113427,-0.068891,-0.006421,0.003175,-0.025139,-0.043144,0.087544,0.005961,-0.020814,-0.089875,-0.071808,0.089583,-0.034068,0.032208,-0.001207,0.044972,0.077456,-0.031496
Cebpa,0.128499,-0.042808,0.213998,-0.29267,0.018335,-0.02098,-0.16976,-0.084022,-0.048913,0.352425,-0.195693,-0.245692,0.024127,0.104164,-0.008241,0.115191,-0.223319,0.35485,-0.067557,0.20008
Dab2,0.06238,0.249633,0.157135,-0.120673,-0.151448,-0.134229,0.239023,0.249931,-0.252567,-0.268395,-0.153194,0.10611,0.189451,-0.079628,-0.127207,0.0376,-0.15368,0.041676,-0.299177,-0.034398
DppaI,0.192988,-0.048068,0.054317,-0.05968,-0.015163,0.126906,0.028676,-0.053566,0.006913,-0.064238,-0.002848,-0.031938,0.041619,-0.049822,0.016657,0.06891,-0.082787,-0.070078,0.021211,0.015277


In [6]:
#Figure 1B

XPC = 'PC1' 
YPC = 'PC2' 
fig = px.scatter(scores, x=XPC, y=YPC, color=labels, hover_data=[XPC,YPC])
fig.update_traces(mode='markers', marker_line_width=1, marker_size=8)
fig.show()

In [7]:
XPC = 'PC1' 
YPC = 'PC2' 
genelabels = W.index
fig = px.scatter(W, x=XPC, y=YPC,color = genelabels, hover_data=[XPC,YPC])
fig.update_traces(mode='markers', marker_line_width=1, marker_size=8)
fig.show()

In [8]:
len(labels)

159