# Tutorial 2: Pseudotime computation with TimeFlow for a linear trajectory
This tutorial shows how to compute the pseudotime of the single-cell flow cytometry data.

The root of the trajectory is the cell n. 35362.

Differentiation stage labels are not used during pseudotime inference, but only for evaluation of the results or visualization.

Labels: m1: Immature Monocyte, m2: Intermediate Monocyte, m3: Mature Monocyte

In [2]:
from __future__ import division
import pandas as pd
import numpy as np
from sklearn.neighbors import kneighbors_graph
import igraph as ig
from sklearn.metrics.pairwise import euclidean_distances
import itertools
import matplotlib.pyplot as plt
import os
import sys

sys.path.append(os.path.abspath("../src/TimeFlow/"))

import pseudotime 
from pseudotime import Pseudotime

In [3]:
np.random.seed(1)

In [4]:
data = pd.read_csv("../Pre-processed-datasets/P1_monocytes.csv")
data

Unnamed: 0,CD200,CD14,CD45,CD45RA,CD64,CD3,CD15,CD133,CD117,CD56,...,CD19,CD33,CD34,CD371,CD7,CD16,CD123,CD36,CD38,Celltype
0,0.476360,0.210000,0.380798,0.593579,0.209033,0.556763,0.390368,0.760927,0.811495,0.433740,...,0.682992,0.641786,0.786273,0.715657,0.572932,0.262740,0.470331,0.275481,0.872200,m1
1,0.408532,0.240234,0.612831,0.502399,0.276553,0.566520,0.492926,0.237612,0.413990,0.322411,...,0.551182,0.656294,0.242648,0.744157,0.618874,0.218125,0.983270,0.455801,0.687064,m2
2,0.322921,0.334046,0.493350,0.395761,0.166498,0.595043,0.401960,0.436152,0.416024,0.333763,...,0.446213,0.570805,0.296832,0.571086,0.339374,0.342555,0.954020,0.413528,0.767273,m2
3,0.404659,0.341854,0.466869,0.505488,0.875529,0.654263,0.551584,0.455843,0.422862,0.440552,...,0.648720,0.893499,0.105882,0.700903,0.817287,0.160520,0.676352,0.433278,0.827099,m2
4,0.359422,0.737795,0.704663,0.609532,0.781949,0.596656,0.527768,0.598056,0.424379,0.388737,...,0.745688,0.930050,0.119879,0.841939,0.485037,0.359423,0.591350,0.886581,0.637507,m3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39251,0.303462,0.290607,0.249948,0.557437,0.469470,0.761341,0.378994,0.480666,0.437503,0.326104,...,0.501915,0.776038,0.224030,0.846944,0.779584,0.199911,0.736472,0.565429,0.815701,m2
39252,0.322432,0.295687,0.394565,0.411515,0.285041,0.733938,0.312513,0.590894,0.625841,0.315181,...,0.577548,0.508150,0.745892,0.292928,0.659374,0.179430,0.713749,0.652844,0.841941,m1
39253,0.472677,0.314547,0.409916,0.413875,0.290130,0.695890,0.337164,0.849519,0.708099,0.303322,...,0.331928,0.502452,0.788373,0.262349,0.527730,0.256492,0.389099,0.345958,0.513077,m1
39254,0.489425,0.357431,0.632018,0.619991,0.479557,0.700178,0.380154,0.488515,0.435385,0.277057,...,0.532311,0.864917,0.178811,0.892551,0.896200,0.164894,0.664771,0.760477,0.775643,m2


In [5]:
# Store independently the differentiation stage labels from gating 
celltype = data[['Celltype']]

# CD markers
markers = data.iloc[:, :20]
markers_arr = markers.to_numpy()

# Retrieve log-probability of the data from the density module
density = pd.read_csv("./Results-P1-Monocytes/density_P1_monocytes.csv")

In [6]:
# Construction of k Nearest Neighbour Graph and implementation of the density-weighting scheme
# Root id is 35362 and k is set to 5 for a k-NNG with one connected component 
# More details for the workflow are given in the pseudotime.py file

cell_graph_output =  pseudotime.Pseudotime(markers, markers_arr, density, 35362, 5)
cell_graph_output.graph_construction()
cell_graph_output.graph_density_weighting_scheme()
cell_graph_output.cell_graph_info()
cell_graph_output.graph_object()
cell_graph_output.shortest_path_edges_traversed()
cell_graph_output.graph_construction_euclidean()
cell_graph_output.euclidean_weights()
pseudotime_estimates = cell_graph_output.pseudotime_()

In [7]:
pseudotime_estimates

[5.047881375022685,
 8.17161072956554,
 6.537304919985052,
 10.016504597880228,
 11.463316111517068,
 11.923102473242198,
 11.189354366294044,
 6.033170294699248,
 8.087729645605233,
 7.125402562527224,
 2.2246482816834456,
 2.266360884576506,
 11.804938868941798,
 14.975773114230446,
 7.640319665532233,
 12.37886345620791,
 7.866795107902972,
 11.567701153732925,
 2.4466361716569276,
 2.707179873963949,
 7.919414632055955,
 8.92318149991146,
 10.82002554374856,
 3.1972412102897243,
 7.470100370343689,
 11.35139286686277,
 7.6787787888537,
 8.317424417128926,
 10.993008917797676,
 9.198182117188074,
 10.73655248750335,
 11.768339346088252,
 7.975455202590987,
 7.936806368859691,
 9.649398811664184,
 10.167578918240553,
 0.805360918379326,
 7.2894795185799595,
 12.356912545003155,
 4.68295544681235,
 7.505636991650221,
 8.810599065182181,
 1.4930016542242408,
 2.6746920620880332,
 2.6722184431292737,
 10.274716320951697,
 10.215362688552876,
 5.20283510195671,
 10.241600637376006,
 8.08

In [8]:
# Concatenate the estimated pseudotime with the original dataset
pseudotime_results = pd.DataFrame(pseudotime_estimates, columns=['pseudotime'])
markers_pseudotime = pd.concat([data, pseudotime_results], axis=1)

In [9]:
markers_pseudotime.to_csv("./Results-P1-Monocytes/P1_monocytes_pseudotime.csv", index=False)

In [10]:
markers_pseudotime

Unnamed: 0,CD200,CD14,CD45,CD45RA,CD64,CD3,CD15,CD133,CD117,CD56,...,CD33,CD34,CD371,CD7,CD16,CD123,CD36,CD38,Celltype,pseudotime
0,0.476360,0.210000,0.380798,0.593579,0.209033,0.556763,0.390368,0.760927,0.811495,0.433740,...,0.641786,0.786273,0.715657,0.572932,0.262740,0.470331,0.275481,0.872200,m1,5.047881
1,0.408532,0.240234,0.612831,0.502399,0.276553,0.566520,0.492926,0.237612,0.413990,0.322411,...,0.656294,0.242648,0.744157,0.618874,0.218125,0.983270,0.455801,0.687064,m2,8.171611
2,0.322921,0.334046,0.493350,0.395761,0.166498,0.595043,0.401960,0.436152,0.416024,0.333763,...,0.570805,0.296832,0.571086,0.339374,0.342555,0.954020,0.413528,0.767273,m2,6.537305
3,0.404659,0.341854,0.466869,0.505488,0.875529,0.654263,0.551584,0.455843,0.422862,0.440552,...,0.893499,0.105882,0.700903,0.817287,0.160520,0.676352,0.433278,0.827099,m2,10.016505
4,0.359422,0.737795,0.704663,0.609532,0.781949,0.596656,0.527768,0.598056,0.424379,0.388737,...,0.930050,0.119879,0.841939,0.485037,0.359423,0.591350,0.886581,0.637507,m3,11.463316
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39251,0.303462,0.290607,0.249948,0.557437,0.469470,0.761341,0.378994,0.480666,0.437503,0.326104,...,0.776038,0.224030,0.846944,0.779584,0.199911,0.736472,0.565429,0.815701,m2,8.298137
39252,0.322432,0.295687,0.394565,0.411515,0.285041,0.733938,0.312513,0.590894,0.625841,0.315181,...,0.508150,0.745892,0.292928,0.659374,0.179430,0.713749,0.652844,0.841941,m1,4.408994
39253,0.472677,0.314547,0.409916,0.413875,0.290130,0.695890,0.337164,0.849519,0.708099,0.303322,...,0.502452,0.788373,0.262349,0.527730,0.256492,0.389099,0.345958,0.513077,m1,2.683957
39254,0.489425,0.357431,0.632018,0.619991,0.479557,0.700178,0.380154,0.488515,0.435385,0.277057,...,0.864917,0.178811,0.892551,0.896200,0.164894,0.664771,0.760477,0.775643,m2,9.725828
