# Project information

**Title:** Mapping cancer cell line dependencies to patient populations

**Authors:** Sinead Dunphy, Alyson Freeman, Kristina Garber

36-alysonkf-kgarb-dunphysi

**Notebook location:** https://colab.research.google.com/drive/1eCHaIFW0K9O34HqLPXVYfAZuFTr9YVKR?usp=sharing


**Links to datasets:**

DepMap CRISPR dependencies data: https://ndownloader.figshare.com/files/29125323

DepMap RNAseq gene expression data: https://ndownloader.figshare.com/files/29124747

DepMap cell line metadata: https://ndownloader.figshare.com/files/29162481

TCGA RNAseq gene expression data: https://xenabrowser.net/datapages/?dataset=EB%2B%2BAdjustPANCAN_IlluminaHiSeq_RNASeqV2.geneExp.xena&host=https%3A%2F%2Fpancanatlas.xenahubs.net&removeHub=https%3A%2F%2Fxena.treehouse.gi.ucsc.edu%3A443

TCGA patient sample metadata: https://api.gdc.cancer.gov/data/1b5f413e-a8d1-4d10-92eb-7c4ae739ed81

TCGA study abbreviations and names: https://gdc.cancer.gov/resources-tcga-users/tcga-code-tables/tcga-study-abbreviations

Gene aliases: https://www.genenames.org/cgi-bin/download/custom?col=gd_app_sym&col=gd_prev_sym&col=gd_aliases&status=Approved&status=Entry%20Withdrawn&hgnc_dbtag=on&order_by=gd_app_sym_sort&format=text&submit=submit

# Importing, cleaning and manipulating datasets

In [51]:
import pandas as pd
import numpy as np
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [52]:
#Importing 1) primary CRISPR gene effect and 2) cell lines info data from DepMap
CRISPR_df = pd.read_csv("/content/drive/Shared drives/Milestone_I/CRISPR_gene_effect.csv")
cell_lines_df = pd.read_csv("/content/drive/Shared drives/Milestone_I/sample_info.csv")
cell_line_expression = pd.read_csv("/content/drive/Shared drives/Milestone_I/CCLE_expression.csv")

In [53]:
#Checking it is the expected shape
CRISPR_df.shape

(990, 17646)

In [54]:
#eyeballing the data
CRISPR_df.head(5)

Unnamed: 0,DepMap_ID,A1BG (1),A1CF (29974),A2M (2),A2ML1 (144568),A3GALT2 (127550),A4GALT (53947),A4GNT (51146),AAAS (8086),AACS (65985),AADAC (13),AADACL2 (344752),AADACL3 (126767),AADACL4 (343066),AADAT (51166),AAED1 (195827),AAGAB (79719),AAK1 (22848),AAMDC (28971),AAMP (14),AANAT (15),AAR2 (25980),AARD (441376),AARS (16),AARS2 (57505),AARSD1 (80755),AASDH (132949),AASDHPPT (60496),AASS (10157),AATF (26574),AATK (9625),ABAT (18),ABCA1 (19),ABCA10 (10349),ABCA12 (26154),ABCA13 (154664),ABCA2 (20),ABCA3 (21),ABCA4 (24),ABCA5 (23461),...,ZRANB3 (84083),ZRSR2 (8233),ZSCAN1 (284312),ZSCAN10 (84891),ZSCAN16 (80345),ZSCAN18 (65982),ZSCAN2 (54993),ZSCAN20 (7579),ZSCAN21 (7589),ZSCAN22 (342945),ZSCAN23 (222696),ZSCAN25 (221785),ZSCAN29 (146050),ZSCAN30 (100101467),ZSCAN31 (64288),ZSCAN32 (54925),ZSCAN4 (201516),ZSCAN5A (79149),ZSCAN5B (342933),ZSCAN9 (7746),ZSWIM1 (90204),ZSWIM2 (151112),ZSWIM3 (140831),ZSWIM4 (65249),ZSWIM5 (57643),ZSWIM6 (57688),ZSWIM7 (125150),ZSWIM8 (23053),ZUP1 (221302),ZW10 (9183),ZWILCH (55055),ZWINT (11130),ZXDA (7789),ZXDB (158586),ZXDC (79364),ZYG11A (440590),ZYG11B (79699),ZYX (7791),ZZEF1 (23140),ZZZ3 (26009)
0,ACH-000001,-0.334969,-0.06158,-0.026897,-0.026507,-0.129643,0.060688,-0.119348,-0.398485,-0.010647,0.162631,-0.310049,0.147555,0.184425,-0.333298,0.153285,-0.079504,-0.296891,-0.118084,-0.963036,-0.215368,-0.036879,-0.075194,-2.074299,-0.743538,-0.093722,-0.108416,-0.525374,0.093326,-0.70081,-0.172236,-0.094491,0.241104,-0.127136,-0.180626,0.060713,-0.111944,-0.163187,-0.369501,-0.051733,...,0.078022,-1.389807,-0.119812,-0.317449,0.065291,-0.411232,-0.069714,-0.067491,-0.001525,-0.238539,0.054201,-0.055563,-0.030228,0.04319,0.146922,-0.186981,-0.089181,0.149982,-0.133839,0.127381,0.125597,-0.10951,0.227661,-0.114913,0.093759,0.138211,-0.235328,-0.092094,0.001644,-0.571902,-0.180651,-0.263863,-0.019301,0.154692,0.216516,-0.09577,-0.025669,0.215264,0.106057,-0.483079
1,ACH-000004,0.020107,-0.00041,-0.055257,-0.071736,-0.088479,-0.233373,0.283944,-0.254014,0.239971,0.146308,-0.03809,0.04491,0.166511,0.040402,0.297193,-0.027029,0.047149,-0.021958,-1.100219,-0.221004,-0.434902,-0.087751,-1.772154,-0.459564,-0.170503,-0.04456,-0.439969,-0.042038,-0.667794,-0.036024,-0.037588,0.088347,0.124844,-0.022769,0.079305,0.008898,-0.019001,-0.295453,0.198513,...,-0.044623,-0.894345,0.118227,0.032023,-0.02262,-0.020994,-0.060484,-0.130784,0.068227,-0.044324,0.148103,0.094501,-0.277805,0.180969,0.041325,-0.142479,0.119594,-0.328146,-0.182727,0.13047,-0.285574,0.078565,-0.009222,0.060892,-0.261122,0.026847,-0.035528,-0.184435,-0.161245,0.044165,-0.229941,-0.582675,0.132592,0.098392,0.181388,0.14052,-0.406777,0.152613,0.197995,-0.199333
2,ACH-000005,-0.191303,0.086,0.235074,0.068524,-0.286711,-0.337232,0.137179,-0.357927,-0.100163,0.010403,0.193095,0.08727,-0.054359,0.030757,0.209344,-0.013128,-0.049996,-0.180642,-1.098065,0.014994,-0.269943,0.035713,-1.837408,-0.324273,-0.261204,-0.039165,-0.258809,0.02645,-0.717115,-0.003977,-0.254419,-0.066099,0.053624,-0.015285,0.061832,0.033628,0.078573,-0.189015,0.100327,...,0.10124,-1.45415,0.061646,0.029666,0.066562,-0.034267,0.076829,-0.186648,-0.175506,-0.041671,0.028856,0.179234,-0.109565,0.0173,-0.018504,0.072726,0.10758,-0.186592,-0.295659,0.085028,0.069183,0.143798,-0.211096,-0.169062,-0.205673,0.186704,-0.068606,-0.245673,-0.206315,-0.234198,-0.283961,-0.485151,-0.016706,0.048799,0.09759,-0.139802,-0.09616,-0.024441,0.016988,-0.267921
3,ACH-000007,0.008862,-0.021161,0.102202,0.107526,-0.045557,-0.007575,0.070338,-0.265205,0.023019,0.250927,0.063849,0.069961,0.093815,-0.161213,0.020884,-0.024435,-0.197741,0.024142,-1.189605,0.024648,0.03516,-0.111406,-1.870909,-0.548806,-0.217481,-0.327512,-0.525538,-0.073879,-0.820872,-0.08011,0.116312,-0.042634,0.004091,0.02699,0.065725,-0.096931,0.171791,-0.186575,0.00614,...,0.096535,-0.846016,0.036525,-0.127508,-0.203019,-0.27484,0.072157,-0.095493,0.046583,0.028542,0.141186,0.050176,-0.040564,-0.035024,-0.026,-0.108843,0.169469,0.006926,-0.10476,0.210014,-0.053852,-0.112955,0.023384,-0.160514,-0.159488,0.047137,-0.066613,-0.233574,-0.148351,-0.3826,-0.246872,-0.518788,0.010676,0.176259,0.142269,-0.020587,-0.363685,0.027735,-0.334223,-0.484621
4,ACH-000009,0.006476,-0.026033,0.116825,0.196238,-0.098705,-0.124901,-0.025107,-0.251012,0.088328,0.130948,-0.052737,-0.246025,-0.013083,-0.06478,-0.119608,0.064713,-0.16708,-0.119103,-1.122296,-0.186775,-0.105022,0.02015,-1.661961,-0.495631,-0.283533,-0.101709,-0.531834,-0.02975,-0.650062,-0.225336,0.084343,-0.004804,0.069033,-0.1852,-0.012656,-0.09175,0.10589,-0.295738,-0.318818,...,-0.083413,-0.580992,0.013672,-0.007818,0.067975,-0.176481,0.10232,-0.235117,-0.003277,-0.01298,0.065445,-0.004791,-0.095217,-0.106363,0.139132,0.150934,0.021566,-0.17564,-0.232412,0.218157,-0.013314,-0.203076,0.088749,-0.01684,-0.227325,0.267733,-0.23697,-0.276963,0.09302,-0.213426,-0.283986,-0.417919,0.03287,0.212718,-0.018123,-0.213931,-0.428286,0.048789,-0.115131,-0.411152


In [55]:
#resetting index to cell line DepMap ID
CRISPR_df.set_index('DepMap_ID', inplace=True)

In [56]:
#checking if it looks as expected
CRISPR_df.head(1)

Unnamed: 0_level_0,A1BG (1),A1CF (29974),A2M (2),A2ML1 (144568),A3GALT2 (127550),A4GALT (53947),A4GNT (51146),AAAS (8086),AACS (65985),AADAC (13),AADACL2 (344752),AADACL3 (126767),AADACL4 (343066),AADAT (51166),AAED1 (195827),AAGAB (79719),AAK1 (22848),AAMDC (28971),AAMP (14),AANAT (15),AAR2 (25980),AARD (441376),AARS (16),AARS2 (57505),AARSD1 (80755),AASDH (132949),AASDHPPT (60496),AASS (10157),AATF (26574),AATK (9625),ABAT (18),ABCA1 (19),ABCA10 (10349),ABCA12 (26154),ABCA13 (154664),ABCA2 (20),ABCA3 (21),ABCA4 (24),ABCA5 (23461),ABCA6 (23460),...,ZRANB3 (84083),ZRSR2 (8233),ZSCAN1 (284312),ZSCAN10 (84891),ZSCAN16 (80345),ZSCAN18 (65982),ZSCAN2 (54993),ZSCAN20 (7579),ZSCAN21 (7589),ZSCAN22 (342945),ZSCAN23 (222696),ZSCAN25 (221785),ZSCAN29 (146050),ZSCAN30 (100101467),ZSCAN31 (64288),ZSCAN32 (54925),ZSCAN4 (201516),ZSCAN5A (79149),ZSCAN5B (342933),ZSCAN9 (7746),ZSWIM1 (90204),ZSWIM2 (151112),ZSWIM3 (140831),ZSWIM4 (65249),ZSWIM5 (57643),ZSWIM6 (57688),ZSWIM7 (125150),ZSWIM8 (23053),ZUP1 (221302),ZW10 (9183),ZWILCH (55055),ZWINT (11130),ZXDA (7789),ZXDB (158586),ZXDC (79364),ZYG11A (440590),ZYG11B (79699),ZYX (7791),ZZEF1 (23140),ZZZ3 (26009)
DepMap_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
ACH-000001,-0.334969,-0.06158,-0.026897,-0.026507,-0.129643,0.060688,-0.119348,-0.398485,-0.010647,0.162631,-0.310049,0.147555,0.184425,-0.333298,0.153285,-0.079504,-0.296891,-0.118084,-0.963036,-0.215368,-0.036879,-0.075194,-2.074299,-0.743538,-0.093722,-0.108416,-0.525374,0.093326,-0.70081,-0.172236,-0.094491,0.241104,-0.127136,-0.180626,0.060713,-0.111944,-0.163187,-0.369501,-0.051733,-0.192586,...,0.078022,-1.389807,-0.119812,-0.317449,0.065291,-0.411232,-0.069714,-0.067491,-0.001525,-0.238539,0.054201,-0.055563,-0.030228,0.04319,0.146922,-0.186981,-0.089181,0.149982,-0.133839,0.127381,0.125597,-0.10951,0.227661,-0.114913,0.093759,0.138211,-0.235328,-0.092094,0.001644,-0.571902,-0.180651,-0.263863,-0.019301,0.154692,0.216516,-0.09577,-0.025669,0.215264,0.106057,-0.483079


In [57]:
CRISPR_df['max'] = CRISPR_df.max(axis=1)

In [58]:
#transpose the dataframe so can work with genes as the rows
CRISPR_transpose = CRISPR_df.drop(columns = ['max']).transpose()

In [59]:
CRISPR_transpose.shape

(17645, 990)

In [60]:
CRISPR_transpose.head()

DepMap_ID,ACH-000001,ACH-000004,ACH-000005,ACH-000007,ACH-000009,ACH-000011,ACH-000012,ACH-000013,ACH-000014,ACH-000015,ACH-000017,ACH-000018,ACH-000019,ACH-000021,ACH-000022,ACH-000023,ACH-000024,ACH-000025,ACH-000028,ACH-000029,ACH-000030,ACH-000035,ACH-000036,ACH-000037,ACH-000039,ACH-000040,ACH-000041,ACH-000042,ACH-000045,ACH-000047,ACH-000048,ACH-000052,ACH-000053,ACH-000054,ACH-000055,ACH-000060,ACH-000067,ACH-000070,ACH-000074,ACH-000075,...,ACH-002230,ACH-002233,ACH-002234,ACH-002239,ACH-002243,ACH-002247,ACH-002249,ACH-002250,ACH-002251,ACH-002257,ACH-002261,ACH-002263,ACH-002265,ACH-002269,ACH-002278,ACH-002280,ACH-002282,ACH-002283,ACH-002284,ACH-002285,ACH-002294,ACH-002295,ACH-002296,ACH-002297,ACH-002298,ACH-002304,ACH-002305,ACH-002315,ACH-002399,ACH-002446,ACH-002458,ACH-002459,ACH-002460,ACH-002462,ACH-002463,ACH-002464,ACH-002467,ACH-002508,ACH-002510,ACH-002512
A1BG (1),-0.334969,0.020107,-0.191303,0.008862,0.006476,0.144966,-0.135015,-0.093118,0.009573,-0.233304,0.045621,-0.15025,-0.084948,-0.144632,-0.123348,-0.134909,0.00788,-0.135757,-0.214794,-0.098954,-0.012566,-0.077951,-0.034852,-0.05834,-0.04164,-0.029223,-0.094291,-0.008927,-0.034514,-0.170074,-0.007964,-0.087994,0.082023,-0.193171,0.111758,-0.033703,0.049506,-0.086552,-0.148488,-0.042857,...,-0.088545,-0.101784,0.00567,-0.040777,-0.012995,-0.188624,-0.059027,-0.04656,0.073922,0.038355,-0.124916,-0.063623,0.028468,0.14402,0.189422,0.037447,-0.137368,-0.168436,-0.059204,-0.095606,0.011022,-0.057317,-0.16703,0.08786,-0.198959,0.021675,0.066203,0.078557,0.197422,-0.218523,-0.164574,-0.114961,-0.070382,-0.055249,0.012528,-0.113886,-0.140471,-0.07995,0.005493,-0.004693
A1CF (29974),-0.06158,-0.00041,0.086,-0.021161,-0.026033,-0.104504,0.118439,-0.052005,-0.153536,-0.080814,0.00024,-0.020622,0.01097,-0.072462,0.049656,-0.107933,-0.001918,-0.211275,-0.18165,0.017406,-0.351064,-0.00696,-0.066619,0.100857,0.152603,-0.095646,0.080225,-0.043387,0.086659,0.230689,-0.112457,-0.058201,0.029688,-0.052326,0.086416,0.059892,-0.217946,-0.106175,0.031065,0.037965,...,-0.261491,-0.091271,0.150086,-0.034703,0.025701,-0.199867,-0.095777,-0.058975,-0.091646,-0.077174,-0.001281,0.036452,0.071694,-0.060662,-0.039074,-0.000837,0.035301,0.068619,-0.074285,0.028372,-0.221683,0.041222,0.023609,-0.202574,0.175387,0.105074,-0.074744,-0.226371,0.031493,-0.312816,0.164348,-0.054229,-0.148926,-0.064067,0.06436,0.008348,0.1233,0.06927,0.012978,0.014939
A2M (2),-0.026897,-0.055257,0.235074,0.102202,0.116825,0.121287,0.253352,0.093733,0.137888,-0.045071,0.041978,-0.181914,0.174494,0.017991,0.223375,0.053574,0.058593,0.050872,0.192795,0.125675,0.163411,0.055804,0.041389,0.054127,0.128234,-0.024763,0.218959,0.131296,0.083742,-0.00037,0.123808,0.183605,0.105163,0.036301,0.130973,-0.038505,0.062871,0.150198,0.270511,0.236993,...,0.170971,0.007676,-0.035864,-0.016141,0.022815,-0.038249,0.122463,0.078437,0.134116,0.097931,0.173613,0.015996,0.180041,0.197261,0.05541,0.079493,0.139364,0.19784,0.187001,0.110241,0.070967,0.238726,-0.011548,0.061205,-0.000719,0.113425,0.049077,0.083538,0.015363,0.046853,0.124925,0.19999,-0.071168,0.039575,0.02683,0.080286,0.169631,0.177966,0.038273,0.224676
A2ML1 (144568),-0.026507,-0.071736,0.068524,0.107526,0.196238,0.340482,0.284129,0.175221,0.118795,0.076349,0.023405,0.119003,0.184318,-0.00829,-0.076456,-0.154982,0.118093,0.190053,0.242694,0.187836,0.167104,0.072329,0.268044,0.099115,0.469084,-0.074614,-0.138626,0.169394,0.134029,0.354782,0.033722,0.127489,0.082431,0.035399,0.103855,0.070583,0.154244,0.261595,0.255401,0.107239,...,-0.01759,0.140844,-0.005725,0.114719,0.181945,0.165169,0.151168,0.201949,0.089478,-0.105688,0.157911,0.037526,0.29774,0.183246,0.054186,0.002799,0.111959,0.207893,0.184432,0.023725,0.244296,0.151469,0.046338,0.155546,0.12363,0.100842,0.006818,0.122887,-0.006871,0.120893,0.244009,0.226472,0.105607,0.175997,-0.101228,0.110665,0.041703,0.122893,0.016146,0.048566
A3GALT2 (127550),-0.129643,-0.088479,-0.286711,-0.045557,-0.098705,-0.15098,-0.037887,-0.196711,-0.1323,-0.139247,-0.332026,-0.013205,-0.195734,-0.10621,-0.338183,-0.206917,-0.041842,-0.1048,-0.225035,-0.310429,-0.140005,-0.144798,-0.223845,-0.027474,0.054252,-0.214071,-0.155304,-0.133536,-0.086771,-0.147037,-0.151305,-0.051454,-0.199094,-0.170716,-0.279808,-0.496797,-0.17068,-0.181737,-0.224285,-0.223066,...,-0.264901,-0.123291,-0.202032,-0.098188,-0.115927,-0.249248,-0.043791,-0.338918,-0.250248,-0.267242,-0.150283,-0.258319,-0.099465,-0.186334,-0.0193,-0.037754,-0.135888,-0.093259,-0.300488,-0.291231,-0.090586,-0.155735,-0.089066,-0.302071,-0.125529,-0.015493,-0.050328,-0.082943,-0.320861,-0.421043,-0.270227,-0.251222,-0.085845,-0.133148,-0.137136,-0.328151,-0.250064,-0.2971,-0.1208,-0.205108


In [61]:
#adding max and min column for the max and min of each row
CRISPR_transpose['max'] = CRISPR_transpose.max(axis=1)
CRISPR_transpose['min'] = CRISPR_transpose.min(axis=1)

In [62]:
#if max <0 for entire row, this means all cell lines are sensitive to the gene in question. 
#Creating a new df to investigate such cases
Panlethal = CRISPR_transpose.drop(CRISPR_transpose[CRISPR_transpose['max']>-1].index)

In [63]:
len(Panlethal)

71

In [64]:
# desc_1 = CRISPR_transpose['max'].describe()
# desc_1

In [65]:
Panlethal.shape

(71, 992)

In [66]:
Panlethal.head()

DepMap_ID,ACH-000001,ACH-000004,ACH-000005,ACH-000007,ACH-000009,ACH-000011,ACH-000012,ACH-000013,ACH-000014,ACH-000015,ACH-000017,ACH-000018,ACH-000019,ACH-000021,ACH-000022,ACH-000023,ACH-000024,ACH-000025,ACH-000028,ACH-000029,ACH-000030,ACH-000035,ACH-000036,ACH-000037,ACH-000039,ACH-000040,ACH-000041,ACH-000042,ACH-000045,ACH-000047,ACH-000048,ACH-000052,ACH-000053,ACH-000054,ACH-000055,ACH-000060,ACH-000067,ACH-000070,ACH-000074,ACH-000075,...,ACH-002234,ACH-002239,ACH-002243,ACH-002247,ACH-002249,ACH-002250,ACH-002251,ACH-002257,ACH-002261,ACH-002263,ACH-002265,ACH-002269,ACH-002278,ACH-002280,ACH-002282,ACH-002283,ACH-002284,ACH-002285,ACH-002294,ACH-002295,ACH-002296,ACH-002297,ACH-002298,ACH-002304,ACH-002305,ACH-002315,ACH-002399,ACH-002446,ACH-002458,ACH-002459,ACH-002460,ACH-002462,ACH-002463,ACH-002464,ACH-002467,ACH-002508,ACH-002510,ACH-002512,max,min
ANKLE2 (23141),-1.983625,-1.720188,-1.753334,-1.709523,-1.804117,-1.755392,-1.858146,-1.890259,-1.794743,-1.941172,-1.99357,-2.04343,-1.929807,-1.733074,-1.656395,-2.05185,-1.799557,-1.701928,-1.792064,-1.777637,-1.687719,-1.875493,-2.021519,-1.527789,-1.612325,-1.604448,-1.564871,-1.879796,-1.986974,-1.98544,-1.885171,-1.715548,-1.928001,-1.76627,-2.167552,-1.386148,-1.88119,-1.822012,-1.992242,-1.79805,...,-2.085834,-1.631934,-2.032233,-1.744124,-1.74054,-1.914902,-2.038111,-1.832656,-1.985517,-1.798259,-1.950179,-1.930279,-1.899845,-1.589859,-1.467598,-2.049214,-1.870968,-1.799513,-1.8167,-2.00945,-1.908674,-1.756558,-1.774478,-1.950487,-1.859853,-1.861285,-1.718853,-1.879544,-2.11529,-1.816725,-2.015736,-1.891176,-1.982022,-1.484037,-1.991246,-1.923606,-1.783196,-1.604663,-1.208388,-2.461852
CCT3 (7203),-1.839926,-1.628325,-1.575213,-1.730466,-2.02247,-1.722471,-1.93752,-1.895847,-1.789223,-2.072687,-1.634541,-1.942881,-1.958949,-1.948558,-1.966664,-2.007996,-1.618981,-2.09896,-1.953644,-2.014169,-1.872473,-1.911184,-1.82611,-1.844422,-1.869079,-2.086201,-1.466716,-1.880668,-1.984498,-1.685199,-1.840703,-1.932058,-2.155073,-2.143699,-1.834935,-1.68679,-1.998439,-2.027923,-2.065378,-1.88294,...,-2.139573,-1.808763,-1.766098,-2.189977,-1.874277,-1.842645,-1.816155,-1.925105,-1.599848,-1.748672,-1.934561,-2.082583,-1.668055,-1.651328,-1.609537,-1.961234,-2.05174,-1.869988,-1.842232,-1.777214,-1.949347,-2.124457,-1.979409,-2.112914,-1.939357,-1.845169,-1.576961,-1.954088,-2.036255,-2.002719,-2.07756,-2.021258,-1.776213,-1.909865,-1.939491,-1.955132,-1.85158,-1.530413,-1.152715,-2.492155
CDC23 (8697),-1.651085,-1.871521,-1.94313,-1.434822,-1.550979,-1.625944,-1.885556,-1.682499,-1.868646,-1.944291,-1.516088,-1.708483,-1.490891,-1.493857,-1.876607,-1.728821,-1.599804,-1.695841,-1.567096,-1.935729,-1.659677,-1.628287,-1.81486,-1.650786,-1.776457,-1.707331,-1.835187,-1.755716,-1.674051,-1.589253,-1.800785,-2.023236,-1.589464,-1.872638,-1.886207,-1.755213,-1.818961,-1.95774,-1.574694,-1.727713,...,-1.696465,-1.766317,-1.832732,-1.927982,-1.75054,-1.481419,-1.693531,-1.786361,-1.631193,-1.710974,-1.690735,-1.7138,-1.661142,-1.767703,-1.903861,-1.840225,-2.10956,-1.776462,-1.475526,-1.620854,-1.707142,-1.65231,-1.434425,-1.599627,-1.69676,-1.652095,-1.570222,-1.744099,-1.566928,-1.690007,-1.459886,-1.751807,-1.661799,-1.056851,-1.8463,-1.804895,-1.710312,-1.793788,-1.056851,-2.250748
CDC7 (8317),-1.90573,-1.511301,-1.690537,-1.925316,-1.866793,-1.641774,-1.743213,-1.892228,-2.053708,-1.749673,-1.533011,-1.78841,-1.872122,-1.440445,-1.898887,-1.765005,-1.660988,-1.871023,-1.919704,-1.854242,-1.615128,-1.900794,-1.775209,-1.604661,-1.990603,-1.67141,-1.693868,-1.956877,-1.686993,-1.842973,-1.76267,-1.777719,-1.741897,-1.6368,-1.445632,-1.756838,-2.064464,-1.665026,-1.723207,-1.855577,...,-2.052937,-1.682472,-1.904833,-1.854307,-1.8203,-1.737102,-1.790698,-1.627918,-1.495027,-1.730689,-2.010133,-1.647619,-1.890333,-1.598034,-1.749907,-1.948853,-1.789887,-1.688686,-1.79689,-1.34886,-1.798272,-1.734224,-1.872304,-1.704879,-1.82755,-1.327774,-1.509734,-1.352108,-1.739654,-1.665439,-1.982319,-1.701185,-1.833136,-1.692965,-1.757515,-1.53342,-1.849633,-1.378688,-1.170224,-2.470308
CHMP2A (27243),-1.331536,-2.177281,-1.724039,-1.623696,-1.550456,-1.486322,-1.810427,-1.792086,-1.696252,-1.790982,-1.386524,-1.648539,-1.76813,-1.63148,-1.541324,-1.754106,-1.577165,-1.599013,-1.688136,-1.807377,-2.094166,-1.903258,-1.801226,-1.672268,-1.774133,-1.529785,-1.80746,-1.389721,-1.797116,-1.384588,-1.750484,-1.644167,-1.966,-1.719502,-1.57014,-1.779881,-1.907147,-1.483959,-1.602671,-1.516362,...,-1.465278,-1.516765,-1.650503,-1.250846,-1.749218,-1.744587,-1.743309,-1.534926,-1.252874,-1.82744,-1.794642,-1.772145,-1.567105,-1.572854,-1.759245,-1.92503,-1.943584,-1.873482,-1.689536,-1.865341,-1.613767,-1.600222,-1.57088,-1.938028,-1.539055,-1.52911,-1.590802,-1.593395,-1.801769,-1.832289,-2.032542,-1.768901,-1.606487,-1.827001,-1.817472,-2.107725,-1.171286,-1.752376,-1.005248,-2.200135


In [67]:
#if max <0 for entire row, this means all cell lines are sensitive to the gene in question. 
#Creating a new df to remove such cases
Not_Panlethal = CRISPR_transpose.drop(CRISPR_transpose[CRISPR_transpose['max']<-1].index)

In [68]:
len(Not_Panlethal)

17574

In [69]:
Not_Panlethal.head()

DepMap_ID,ACH-000001,ACH-000004,ACH-000005,ACH-000007,ACH-000009,ACH-000011,ACH-000012,ACH-000013,ACH-000014,ACH-000015,ACH-000017,ACH-000018,ACH-000019,ACH-000021,ACH-000022,ACH-000023,ACH-000024,ACH-000025,ACH-000028,ACH-000029,ACH-000030,ACH-000035,ACH-000036,ACH-000037,ACH-000039,ACH-000040,ACH-000041,ACH-000042,ACH-000045,ACH-000047,ACH-000048,ACH-000052,ACH-000053,ACH-000054,ACH-000055,ACH-000060,ACH-000067,ACH-000070,ACH-000074,ACH-000075,...,ACH-002234,ACH-002239,ACH-002243,ACH-002247,ACH-002249,ACH-002250,ACH-002251,ACH-002257,ACH-002261,ACH-002263,ACH-002265,ACH-002269,ACH-002278,ACH-002280,ACH-002282,ACH-002283,ACH-002284,ACH-002285,ACH-002294,ACH-002295,ACH-002296,ACH-002297,ACH-002298,ACH-002304,ACH-002305,ACH-002315,ACH-002399,ACH-002446,ACH-002458,ACH-002459,ACH-002460,ACH-002462,ACH-002463,ACH-002464,ACH-002467,ACH-002508,ACH-002510,ACH-002512,max,min
A1BG (1),-0.334969,0.020107,-0.191303,0.008862,0.006476,0.144966,-0.135015,-0.093118,0.009573,-0.233304,0.045621,-0.15025,-0.084948,-0.144632,-0.123348,-0.134909,0.00788,-0.135757,-0.214794,-0.098954,-0.012566,-0.077951,-0.034852,-0.05834,-0.04164,-0.029223,-0.094291,-0.008927,-0.034514,-0.170074,-0.007964,-0.087994,0.082023,-0.193171,0.111758,-0.033703,0.049506,-0.086552,-0.148488,-0.042857,...,0.00567,-0.040777,-0.012995,-0.188624,-0.059027,-0.04656,0.073922,0.038355,-0.124916,-0.063623,0.028468,0.14402,0.189422,0.037447,-0.137368,-0.168436,-0.059204,-0.095606,0.011022,-0.057317,-0.16703,0.08786,-0.198959,0.021675,0.066203,0.078557,0.197422,-0.218523,-0.164574,-0.114961,-0.070382,-0.055249,0.012528,-0.113886,-0.140471,-0.07995,0.005493,-0.004693,0.37236,-0.429579
A1CF (29974),-0.06158,-0.00041,0.086,-0.021161,-0.026033,-0.104504,0.118439,-0.052005,-0.153536,-0.080814,0.00024,-0.020622,0.01097,-0.072462,0.049656,-0.107933,-0.001918,-0.211275,-0.18165,0.017406,-0.351064,-0.00696,-0.066619,0.100857,0.152603,-0.095646,0.080225,-0.043387,0.086659,0.230689,-0.112457,-0.058201,0.029688,-0.052326,0.086416,0.059892,-0.217946,-0.106175,0.031065,0.037965,...,0.150086,-0.034703,0.025701,-0.199867,-0.095777,-0.058975,-0.091646,-0.077174,-0.001281,0.036452,0.071694,-0.060662,-0.039074,-0.000837,0.035301,0.068619,-0.074285,0.028372,-0.221683,0.041222,0.023609,-0.202574,0.175387,0.105074,-0.074744,-0.226371,0.031493,-0.312816,0.164348,-0.054229,-0.148926,-0.064067,0.06436,0.008348,0.1233,0.06927,0.012978,0.014939,0.553409,-0.864432
A2M (2),-0.026897,-0.055257,0.235074,0.102202,0.116825,0.121287,0.253352,0.093733,0.137888,-0.045071,0.041978,-0.181914,0.174494,0.017991,0.223375,0.053574,0.058593,0.050872,0.192795,0.125675,0.163411,0.055804,0.041389,0.054127,0.128234,-0.024763,0.218959,0.131296,0.083742,-0.00037,0.123808,0.183605,0.105163,0.036301,0.130973,-0.038505,0.062871,0.150198,0.270511,0.236993,...,-0.035864,-0.016141,0.022815,-0.038249,0.122463,0.078437,0.134116,0.097931,0.173613,0.015996,0.180041,0.197261,0.05541,0.079493,0.139364,0.19784,0.187001,0.110241,0.070967,0.238726,-0.011548,0.061205,-0.000719,0.113425,0.049077,0.083538,0.015363,0.046853,0.124925,0.19999,-0.071168,0.039575,0.02683,0.080286,0.169631,0.177966,0.038273,0.224676,0.455,-0.31631
A2ML1 (144568),-0.026507,-0.071736,0.068524,0.107526,0.196238,0.340482,0.284129,0.175221,0.118795,0.076349,0.023405,0.119003,0.184318,-0.00829,-0.076456,-0.154982,0.118093,0.190053,0.242694,0.187836,0.167104,0.072329,0.268044,0.099115,0.469084,-0.074614,-0.138626,0.169394,0.134029,0.354782,0.033722,0.127489,0.082431,0.035399,0.103855,0.070583,0.154244,0.261595,0.255401,0.107239,...,-0.005725,0.114719,0.181945,0.165169,0.151168,0.201949,0.089478,-0.105688,0.157911,0.037526,0.29774,0.183246,0.054186,0.002799,0.111959,0.207893,0.184432,0.023725,0.244296,0.151469,0.046338,0.155546,0.12363,0.100842,0.006818,0.122887,-0.006871,0.120893,0.244009,0.226472,0.105607,0.175997,-0.101228,0.110665,0.041703,0.122893,0.016146,0.048566,0.469084,-0.270329
A3GALT2 (127550),-0.129643,-0.088479,-0.286711,-0.045557,-0.098705,-0.15098,-0.037887,-0.196711,-0.1323,-0.139247,-0.332026,-0.013205,-0.195734,-0.10621,-0.338183,-0.206917,-0.041842,-0.1048,-0.225035,-0.310429,-0.140005,-0.144798,-0.223845,-0.027474,0.054252,-0.214071,-0.155304,-0.133536,-0.086771,-0.147037,-0.151305,-0.051454,-0.199094,-0.170716,-0.279808,-0.496797,-0.17068,-0.181737,-0.224285,-0.223066,...,-0.202032,-0.098188,-0.115927,-0.249248,-0.043791,-0.338918,-0.250248,-0.267242,-0.150283,-0.258319,-0.099465,-0.186334,-0.0193,-0.037754,-0.135888,-0.093259,-0.300488,-0.291231,-0.090586,-0.155735,-0.089066,-0.302071,-0.125529,-0.015493,-0.050328,-0.082943,-0.320861,-0.421043,-0.270227,-0.251222,-0.085845,-0.133148,-0.137136,-0.328151,-0.250064,-0.2971,-0.1208,-0.205108,0.265478,-0.821927


In [70]:
#if min >0 for entire row, this means no cell lines are sensitive to the gene in question. 
#Creating a new df to drop such cases
Not_all_insensitive = Not_Panlethal.drop(Not_Panlethal[Not_Panlethal['min']>-1].index)

In [71]:
len(Not_all_insensitive)

3584

In [72]:
#we want to focus on genes where some cells are sensitive and some are insenstive. 
#We picked out these genes in the Not_all_insensitive df. Now transposing so genes are back in the columns
#and dropping the max and min colunms
Genes_longlist = Not_all_insensitive.drop(columns = ['max','min']).transpose()

In [73]:
Genes_longlist.head()

Unnamed: 0_level_0,AAAS (8086),AAMP (14),AARS (16),AARS2 (57505),AASDHPPT (60496),AATF (26574),ABCA5 (23461),ABCB6 (10058),ABCB7 (22),ABCD1 (215),ABCD3 (5825),ABCE1 (6059),ABCF1 (23),ABCG1 (9619),ABHD11 (83451),ABHD15 (116236),ABHD17A (81926),ABL1 (25),ABT1 (29777),ACACA (31),ACADSB (36),ACAT2 (39),ACIN1 (22985),ACLY (47),ACO2 (50),ACOX1 (51),ACSL3 (2181),ACSL4 (2182),ACTB (60),ACTG1 (71),ACTL6A (86),ACTR10 (55860),ACTR1A (10121),ACTR1B (10120),ACTR2 (10097),ACTR3 (10096),ACTR5 (79913),ACTR6 (64431),ACTR8 (93973),ACVR1B (91),...,ZNF407 (55628),ZNF468 (90333),ZNF492 (57615),ZNF511 (118472),ZNF558 (148156),ZNF559 (84527),ZNF572 (137209),ZNF574 (64763),ZNF593 (51042),ZNF605 (100289635),ZNF622 (90441),ZNF629 (23361),ZNF638 (27332),ZNF658 (26149),ZNF674 (641339),ZNF676 (163223),ZNF680 (340252),ZNF69 (7620),ZNF691 (51058),ZNF697 (90874),ZNF720 (124411),ZNF763 (284390),ZNF780B (163131),ZNF792 (126375),ZNF816 (125893),ZNF830 (91603),ZNFX1 (57169),ZNHIT1 (10467),ZNHIT2 (741),ZNHIT3 (9326),ZNHIT6 (54680),ZNRD1 (30834),ZPR1 (8882),ZRANB2 (9406),ZRSR2 (8233),ZSWIM7 (125150),ZW10 (9183),ZWINT (11130),ZXDB (158586),ZZZ3 (26009)
DepMap_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
ACH-000001,-0.398485,-0.963036,-2.074299,-0.743538,-0.525374,-0.70081,-0.051733,-0.037758,-0.788852,-0.316639,-0.356726,-1.538218,-0.935995,-0.261722,-0.08362,-0.178215,-0.326017,-0.245434,-0.776455,-0.584792,-0.243436,0.022181,-1.046714,-0.882047,-1.320963,-0.192008,-0.290953,-0.079139,-0.126598,-0.544531,-1.323272,-1.368988,-0.466656,-0.482251,-0.89097,-0.454845,-0.125862,-0.908882,-0.598205,-0.307154,...,-0.975698,-0.332401,-0.695062,-0.127201,-0.016723,-1.149768,0.103691,-1.46991,-0.203006,-0.276395,-0.231118,-0.313078,-0.267524,-0.250674,-0.692095,-0.233109,-0.267504,-0.151733,-0.011145,-0.216051,-0.64035,-0.204443,-0.660099,-0.767942,-0.010159,-1.335244,-0.051716,-0.508903,-1.596799,-0.547879,-0.72123,-0.906304,-1.638325,-0.304286,-1.389807,-0.235328,-0.571902,-0.263863,0.154692,-0.483079
ACH-000004,-0.254014,-1.100219,-1.772154,-0.459564,-0.439969,-0.667794,0.198513,-0.075275,-1.247909,-0.386068,-0.027153,-1.369292,-0.897073,-0.513211,-0.546949,-0.471595,-0.352556,-0.016246,-0.888016,-0.153241,-0.279774,0.091826,-1.064455,-0.353581,-0.858452,-0.496958,-0.532588,-0.592898,-0.657471,-0.434319,-1.476652,-1.393649,-0.956623,-0.270476,-0.798967,-0.358614,-0.401566,-0.821942,-0.849894,-0.015029,...,-0.970848,-0.547886,-0.881266,0.083084,0.114525,-1.04625,0.116628,-0.739999,-0.551644,0.090535,-0.279801,-0.392,-0.381993,-0.418437,-0.336163,-0.504439,-0.512421,-0.094218,-0.093674,0.069596,-0.587488,-0.478348,-0.292099,-0.056409,-0.324245,-1.222595,0.010937,-0.534872,-1.585306,-0.311468,-0.365709,-0.676512,-1.379785,-0.198099,-0.894345,-0.035528,0.044165,-0.582675,0.098392,-0.199333
ACH-000005,-0.357927,-1.098065,-1.837408,-0.324273,-0.258809,-0.717115,0.100327,0.04574,-1.133865,-0.180408,-0.115135,-1.387696,-0.78107,-0.689998,-0.605686,-0.365234,0.012914,0.055653,-0.78476,-0.380558,-0.005471,0.031358,-0.996011,-0.44628,-1.1539,0.037804,-0.235304,-0.397238,-0.641967,-0.416186,-1.367483,-1.513006,-0.867062,-0.430296,-0.77476,-0.349217,-0.458778,-0.49853,-0.50697,-0.054218,...,-1.006409,-0.420719,-0.426359,-0.049522,0.123406,-0.803652,-0.141241,-1.164011,-0.57425,0.026582,-0.007033,-0.405424,-0.322991,-0.236915,-0.084081,-0.321966,-0.374985,-0.133319,-0.124061,-0.375309,-0.299775,-0.419059,-0.195371,-0.137924,-0.13919,-1.14768,-0.051895,-0.532869,-1.816267,-0.437388,-0.686556,-0.525372,-1.229239,-0.439597,-1.45415,-0.068606,-0.234198,-0.485151,0.048799,-0.267921
ACH-000007,-0.265205,-1.189605,-1.870909,-0.548806,-0.525538,-0.820872,0.00614,-0.075154,-1.401689,-0.195836,-0.294988,-1.427313,-1.222012,-0.193002,-0.371981,-0.225597,-0.245112,0.098636,-1.002213,-0.930745,-0.108527,0.234094,-1.198282,-0.725259,-0.811072,-0.04437,-0.255119,0.092797,-0.904194,-0.665548,-1.442503,-1.188768,-0.651423,-0.261214,-1.084841,-0.225055,-0.629323,-0.631921,-0.758139,-0.053199,...,-0.794333,-0.314878,-0.954518,-0.1184,0.068738,-0.721383,-0.19913,-1.188527,-0.690889,0.033796,-0.529799,-0.341125,-0.318863,-0.592241,-0.333849,-0.435848,-0.264846,-0.138894,-0.151205,-0.217781,-0.495596,-0.508328,-0.370395,-0.177964,-0.071347,-1.148869,-0.085692,-0.487626,-1.678871,-0.889379,-0.627705,-1.200707,-1.354194,-0.375521,-0.846016,-0.066613,-0.3826,-0.518788,0.176259,-0.484621
ACH-000009,-0.251012,-1.122296,-1.661961,-0.495631,-0.531834,-0.650062,-0.318818,-0.126112,-1.589748,-0.199476,-0.278833,-1.526793,-1.063056,-0.152085,-0.360132,-0.189979,-0.368807,0.034733,-1.092919,-0.695796,0.018658,-0.087278,-0.834554,-1.0317,-1.138266,-0.24776,-0.31018,-0.025981,-0.97375,-0.627172,-1.460897,-1.375759,-1.358966,-0.365277,-0.936443,-0.404296,-0.150124,-0.891385,-0.631769,-0.026547,...,-0.89677,-0.403154,-0.787785,-0.208208,0.112318,-0.918815,0.04191,-0.705703,-0.525509,0.134196,-0.274179,-0.141339,-0.235266,-0.527528,-0.392348,-0.306064,-0.23455,-0.139409,-0.169771,-0.153118,-0.457646,-0.255673,-0.286425,-0.130433,-0.196246,-1.05131,-0.106963,-0.441556,-1.332934,-0.6967,-0.800057,-0.970397,-1.300791,-0.640668,-0.580992,-0.23697,-0.213426,-0.417919,0.212718,-0.411152


In [74]:
Genes_longlist.tail()

Unnamed: 0_level_0,AAAS (8086),AAMP (14),AARS (16),AARS2 (57505),AASDHPPT (60496),AATF (26574),ABCA5 (23461),ABCB6 (10058),ABCB7 (22),ABCD1 (215),ABCD3 (5825),ABCE1 (6059),ABCF1 (23),ABCG1 (9619),ABHD11 (83451),ABHD15 (116236),ABHD17A (81926),ABL1 (25),ABT1 (29777),ACACA (31),ACADSB (36),ACAT2 (39),ACIN1 (22985),ACLY (47),ACO2 (50),ACOX1 (51),ACSL3 (2181),ACSL4 (2182),ACTB (60),ACTG1 (71),ACTL6A (86),ACTR10 (55860),ACTR1A (10121),ACTR1B (10120),ACTR2 (10097),ACTR3 (10096),ACTR5 (79913),ACTR6 (64431),ACTR8 (93973),ACVR1B (91),...,ZNF407 (55628),ZNF468 (90333),ZNF492 (57615),ZNF511 (118472),ZNF558 (148156),ZNF559 (84527),ZNF572 (137209),ZNF574 (64763),ZNF593 (51042),ZNF605 (100289635),ZNF622 (90441),ZNF629 (23361),ZNF638 (27332),ZNF658 (26149),ZNF674 (641339),ZNF676 (163223),ZNF680 (340252),ZNF69 (7620),ZNF691 (51058),ZNF697 (90874),ZNF720 (124411),ZNF763 (284390),ZNF780B (163131),ZNF792 (126375),ZNF816 (125893),ZNF830 (91603),ZNFX1 (57169),ZNHIT1 (10467),ZNHIT2 (741),ZNHIT3 (9326),ZNHIT6 (54680),ZNRD1 (30834),ZPR1 (8882),ZRANB2 (9406),ZRSR2 (8233),ZSWIM7 (125150),ZW10 (9183),ZWINT (11130),ZXDB (158586),ZZZ3 (26009)
DepMap_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
ACH-002464,-0.022297,-1.271886,-1.647117,-0.851048,-0.394476,-0.988012,0.039014,0.023759,-1.32839,-0.337305,-0.076928,-1.49423,-1.097284,-0.5671,-0.468591,-0.461085,-0.134939,0.21247,-1.211596,-0.461857,0.005552,0.277568,-0.810673,-0.525898,-0.75498,0.174097,-0.182176,-0.715293,0.411016,-0.043843,-1.04674,-0.962756,-0.564092,-0.069053,-1.222447,-1.121791,-0.277543,-0.046886,-0.698533,0.108957,...,-0.702273,-0.301275,-0.73133,-0.234367,0.113335,-0.44488,-0.102817,-0.911781,-0.821091,0.205824,-0.584919,-0.220086,-1.332368,-0.449381,-0.006216,-0.400238,-0.116309,-0.358426,-0.218249,-0.111983,-0.246702,-0.379237,-0.52227,-0.027825,-0.023238,-1.521734,-0.073061,0.210255,-1.5392,-0.871465,-0.72436,-0.93327,-1.392437,-0.669075,-1.027804,-0.190227,-0.573754,-0.331121,-0.118071,-0.491949
ACH-002467,-0.122974,-1.131538,-1.492055,-0.333894,-0.631993,-1.073129,0.086207,-0.123886,-1.40653,-0.267115,-0.266686,-1.346289,-1.257858,0.066365,-0.107515,-0.393986,-0.382221,0.039322,-1.036346,-0.565639,0.010448,0.190028,-0.837308,-0.551968,-0.542675,-0.050362,-0.534367,-0.441725,-0.330115,-0.491423,-1.125117,-1.187635,-1.008902,-0.291953,-0.856651,-0.86537,-0.495787,-0.821614,-0.582755,0.017285,...,-1.186605,-0.265246,-0.618812,-0.382315,0.264788,-0.524871,-0.099964,-1.010732,-0.961446,0.001412,-0.915489,-0.239998,-0.398551,-0.377265,-0.137653,-0.420593,-0.160003,-0.221069,0.066636,-0.062644,-0.531808,-0.342419,-0.521657,0.0092,-0.049998,-0.924824,0.141595,-0.248791,-1.644152,-0.866115,-0.556568,-0.820884,-1.577838,-0.313587,-0.695532,-0.137585,-0.470498,-0.911156,0.166627,-0.885358
ACH-002508,-0.412489,-0.812476,-1.905825,-0.445009,-0.217806,-0.898234,-0.118214,-0.135261,-1.300582,-0.142666,-0.429734,-1.227896,-0.900045,-0.340337,-0.332057,-0.394447,-0.452949,0.249091,-0.914877,-0.793493,-0.051432,0.29393,-0.532635,-0.754159,-0.33416,0.307166,-0.96946,-0.0946,-0.244088,-0.461672,-1.598375,-1.001591,-1.802201,-0.279632,-1.185581,-0.743883,-0.328493,-0.843848,-0.751909,-0.388191,...,-1.463861,-0.603151,-0.660879,-0.028408,-0.043995,-0.868451,-0.005118,-0.8129,-0.559874,0.125041,-0.187691,-0.312112,-0.092453,-0.632112,-0.426206,-0.319606,-0.322323,-0.357495,-0.166535,-0.045295,-0.401304,-0.488461,-0.376396,-0.125777,0.210942,-1.252005,-0.15701,-0.72557,-1.540773,-0.548618,-0.576054,-0.989314,-1.258527,-0.577565,-1.159329,-0.181379,-0.381362,-0.26876,0.377967,-0.060559
ACH-002510,-0.167518,-0.996121,-1.687473,-0.573541,-0.251771,-0.995856,-0.274134,-0.096503,-1.473697,-0.295354,0.063964,-1.07399,-1.09208,-0.268272,-0.337498,-0.057628,-0.159571,-0.062646,-1.149427,-0.424217,-0.121937,0.245084,-0.784247,-1.129798,-1.236559,-0.025421,-0.588041,0.017192,-0.279866,-0.189975,-1.89186,-0.870832,-0.014835,-0.179749,-1.257718,-0.601404,-0.342797,-0.875178,-0.657154,-0.194445,...,-0.689709,-0.152732,-0.539053,-0.948821,0.045372,-0.798254,-0.038223,-0.623907,-0.45728,-0.149466,-0.360627,-0.010257,-0.199208,-0.392779,-0.394518,-0.244235,-0.414,-0.209508,-0.159153,0.023956,-0.567181,-0.274297,-0.269406,0.037874,0.113207,-1.270096,-0.202003,-0.341528,-1.67677,-0.734768,-0.762906,-0.610235,-1.377847,-0.440889,-0.792308,0.154393,-0.57798,-0.83777,-0.150582,-0.404162
ACH-002512,-0.759558,-0.939016,-1.449317,-0.503061,0.003079,-0.977338,-0.010262,0.124969,-1.187382,-0.215672,-0.415288,-1.731713,-0.988303,-0.918354,-0.510792,0.041081,-0.070378,-0.145822,-1.261185,-0.363759,-0.069658,0.424,-0.498657,-0.453016,-0.877456,-0.051342,-0.470609,0.005153,-0.259241,-0.705277,-1.196338,-1.146507,-0.806272,-0.144785,-1.029782,-0.710484,-0.277613,-0.839834,-1.335148,0.042574,...,-0.473217,-0.27697,-0.815216,-0.168244,0.014124,-0.702413,-0.34664,-0.561369,-0.500464,0.100661,-0.075009,-0.243599,-0.105451,-0.040732,-0.323087,-0.035541,-1.011554,-0.169029,-0.170673,-0.191465,-0.480694,-0.192807,-0.390542,0.078944,0.054937,-1.079326,-0.064281,-0.303827,-1.605515,-0.260041,-0.881604,-0.777098,-1.334183,-0.348623,-1.004823,-0.103209,0.003334,-0.394694,0.06042,-1.036221


In [75]:
Genes_longlist.shape

(990, 3584)

In [76]:
#count the number of senstive (below -1 gene dependancy) cell lines per gene
Above_minus_1 = Genes_longlist[Genes_longlist<-1].count()

In [77]:
Above_minus_1.head()

AAAS (8086)           1
AAMP (14)           518
AARS (16)           989
AARS2 (57505)        11
AASDHPPT (60496)      4
dtype: int64

In [78]:
#changing series to dataframe
abv_df = pd.DataFrame({'gene':Above_minus_1.index,'sensitive count':Above_minus_1.values}).set_index('gene')
abv_df.head()

Unnamed: 0_level_0,sensitive count
gene,Unnamed: 1_level_1
AAAS (8086),1
AAMP (14),518
AARS (16),989
AARS2 (57505),11
AASDHPPT (60496),4


In [79]:
#getting percentage of sensitive cell lines per gene
abv_df['percent sensitive'] = (abv_df['sensitive count']/990)*100
abv_df['insensitive count']= 990 - (abv_df['sensitive count'])
abv_df['percent insensitive'] = (abv_df['insensitive count']/990)*100
abv_df.head()

Unnamed: 0_level_0,sensitive count,percent sensitive,insensitive count,percent insensitive
gene,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AAAS (8086),1,0.10101,989,99.89899
AAMP (14),518,52.323232,472,47.676768
AARS (16),989,99.89899,1,0.10101
AARS2 (57505),11,1.111111,979,98.888889
AASDHPPT (60496),4,0.40404,986,99.59596


In [80]:
#pulling out genes where more that 50% of cell lines are sensitive
percent_50 = abv_df[abv_df['percent sensitive']>50]
print('shape: ',percent_50.shape)
percent_50.head()

shape:  (788, 4)


Unnamed: 0_level_0,sensitive count,percent sensitive,insensitive count,percent insensitive
gene,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AAMP (14),518,52.323232,472,47.676768
AARS (16),989,99.89899,1,0.10101
ABCB7 (22),831,83.939394,159,16.060606
ABCE1 (6059),985,99.494949,5,0.505051
ABCF1 (23),588,59.393939,402,40.606061


In [81]:
#pulling out genes where more that 75% of cell lines are sensitive
percent_75 = abv_df[abv_df['percent sensitive']>75]
print('shape: ',percent_75.shape)
percent_75.head()

shape:  (607, 4)


Unnamed: 0_level_0,sensitive count,percent sensitive,insensitive count,percent insensitive
gene,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AARS (16),989,99.89899,1,0.10101
ABCB7 (22),831,83.939394,159,16.060606
ABCE1 (6059),985,99.494949,5,0.505051
ACTL6A (86),947,95.656566,43,4.343434
ACTR10 (55860),849,85.757576,141,14.242424


In [82]:
#pulling out genes where less than 25% of cell lines are sensitive
percent_25 = abv_df[abv_df['percent sensitive']<25]
print('shape: ',percent_25.shape)
percent_25.head()

shape:  (2610, 4)


Unnamed: 0_level_0,sensitive count,percent sensitive,insensitive count,percent insensitive
gene,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AAAS (8086),1,0.10101,989,99.89899
AARS2 (57505),11,1.111111,979,98.888889
AASDHPPT (60496),4,0.40404,986,99.59596
AATF (26574),83,8.383838,907,91.616162
ABCA5 (23461),1,0.10101,989,99.89899


In [83]:
#pulling out genes where between 5 and 25% of cell lines are sensitive
percent_5_25 = abv_df[abv_df['percent sensitive'].between(5,25)]
print('shape: ',percent_5_25.shape)
percent_5_25.head()

shape:  (433, 4)


Unnamed: 0_level_0,sensitive count,percent sensitive,insensitive count,percent insensitive
gene,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AATF (26574),83,8.383838,907,91.616162
ACIN1 (22985),221,22.323232,769,77.676768
ACLY (47),54,5.454545,936,94.545455
ACO2 (50),229,23.131313,761,76.868687
ACTB (60),124,12.525253,866,87.474747


In [84]:
#pulling out genes where between 25 and 50% of cell lines are sensitive
percent_25_50 = abv_df[abv_df['percent sensitive'].between(25,50)]
print('shape: ',percent_25_50.shape)
percent_25_50.head()

shape:  (186, 4)


Unnamed: 0_level_0,sensitive count,percent sensitive,insensitive count,percent insensitive
gene,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ABT1 (29777),310,31.313131,680,68.686869
ACTR1A (10121),338,34.141414,652,65.858586
ADSL (158),442,44.646465,548,55.353535
AHCTF1 (25909),295,29.79798,695,70.20202
AK6 (102157402),373,37.676768,617,62.323232


In [85]:
list_50_percent = percent_50.index.to_list()
list_75_percent = percent_75.index.to_list()
list_25_percent = percent_25.index.to_list()
list_5_25_percent = percent_5_25.index.to_list()
list_25_50_percent = percent_25_50.index.to_list()

In [86]:
print('List_50_percent #genes    : ',len(list_50_percent), ',    First 5 genes : ', list_50_percent[:4])
print('List_75_percent #genes    : ',len(list_75_percent), ',    First 5 genes : ', list_75_percent[:4])
print('List_25_percent #genes    : ',len(list_25_percent), ',   First 5 genes: ', list_25_percent[:4])
print('List_5_25_percent #genes  : ',len(list_5_25_percent), ',    First 5 genes : ', list_5_25_percent[:4])
print('List_25_50_percent #genes : ',len(list_25_50_percent), ',    First 5 genes : ', list_25_50_percent[:4])

List_50_percent #genes    :  788 ,    First 5 genes :  ['AAMP (14)', 'AARS (16)', 'ABCB7 (22)', 'ABCE1 (6059)']
List_75_percent #genes    :  607 ,    First 5 genes :  ['AARS (16)', 'ABCB7 (22)', 'ABCE1 (6059)', 'ACTL6A (86)']
List_25_percent #genes    :  2610 ,   First 5 genes:  ['AAAS (8086)', 'AARS2 (57505)', 'AASDHPPT (60496)', 'AATF (26574)']
List_5_25_percent #genes  :  433 ,    First 5 genes :  ['AATF (26574)', 'ACIN1 (22985)', 'ACLY (47)', 'ACO2 (50)']
List_25_50_percent #genes :  186 ,    First 5 genes :  ['ABT1 (29777)', 'ACTR1A (10121)', 'ADSL (158)', 'AHCTF1 (25909)']


# Visualizing gene dependencies with waterfall plots

In [87]:
! pip install altair vega_datasets
import altair as alt



In [88]:
#Pulling out cell line name, primary disease and subtype from cell lines info dataset
Primary_disease_and_subtype = cell_lines_df[['DepMap_ID','stripped_cell_line_name','primary_disease','Subtype']]
Primary_disease_and_subtype.set_index('DepMap_ID',inplace=True)

In [89]:
#merging this into the CRISPR datset
CRISPR_w_disease =pd.merge(CRISPR_df,Primary_disease_and_subtype, how = 'left',left_index = True, right_index =True)
CRISPR_w_disease.head()  

Unnamed: 0_level_0,A1BG (1),A1CF (29974),A2M (2),A2ML1 (144568),A3GALT2 (127550),A4GALT (53947),A4GNT (51146),AAAS (8086),AACS (65985),AADAC (13),AADACL2 (344752),AADACL3 (126767),AADACL4 (343066),AADAT (51166),AAED1 (195827),AAGAB (79719),AAK1 (22848),AAMDC (28971),AAMP (14),AANAT (15),AAR2 (25980),AARD (441376),AARS (16),AARS2 (57505),AARSD1 (80755),AASDH (132949),AASDHPPT (60496),AASS (10157),AATF (26574),AATK (9625),ABAT (18),ABCA1 (19),ABCA10 (10349),ABCA12 (26154),ABCA13 (154664),ABCA2 (20),ABCA3 (21),ABCA4 (24),ABCA5 (23461),ABCA6 (23460),...,ZSCAN16 (80345),ZSCAN18 (65982),ZSCAN2 (54993),ZSCAN20 (7579),ZSCAN21 (7589),ZSCAN22 (342945),ZSCAN23 (222696),ZSCAN25 (221785),ZSCAN29 (146050),ZSCAN30 (100101467),ZSCAN31 (64288),ZSCAN32 (54925),ZSCAN4 (201516),ZSCAN5A (79149),ZSCAN5B (342933),ZSCAN9 (7746),ZSWIM1 (90204),ZSWIM2 (151112),ZSWIM3 (140831),ZSWIM4 (65249),ZSWIM5 (57643),ZSWIM6 (57688),ZSWIM7 (125150),ZSWIM8 (23053),ZUP1 (221302),ZW10 (9183),ZWILCH (55055),ZWINT (11130),ZXDA (7789),ZXDB (158586),ZXDC (79364),ZYG11A (440590),ZYG11B (79699),ZYX (7791),ZZEF1 (23140),ZZZ3 (26009),max,stripped_cell_line_name,primary_disease,Subtype
DepMap_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
ACH-000001,-0.334969,-0.06158,-0.026897,-0.026507,-0.129643,0.060688,-0.119348,-0.398485,-0.010647,0.162631,-0.310049,0.147555,0.184425,-0.333298,0.153285,-0.079504,-0.296891,-0.118084,-0.963036,-0.215368,-0.036879,-0.075194,-2.074299,-0.743538,-0.093722,-0.108416,-0.525374,0.093326,-0.70081,-0.172236,-0.094491,0.241104,-0.127136,-0.180626,0.060713,-0.111944,-0.163187,-0.369501,-0.051733,-0.192586,...,0.065291,-0.411232,-0.069714,-0.067491,-0.001525,-0.238539,0.054201,-0.055563,-0.030228,0.04319,0.146922,-0.186981,-0.089181,0.149982,-0.133839,0.127381,0.125597,-0.10951,0.227661,-0.114913,0.093759,0.138211,-0.235328,-0.092094,0.001644,-0.571902,-0.180651,-0.263863,-0.019301,0.154692,0.216516,-0.09577,-0.025669,0.215264,0.106057,-0.483079,0.728309,NIHOVCAR3,Ovarian Cancer,"Adenocarcinoma, high grade serous"
ACH-000004,0.020107,-0.00041,-0.055257,-0.071736,-0.088479,-0.233373,0.283944,-0.254014,0.239971,0.146308,-0.03809,0.04491,0.166511,0.040402,0.297193,-0.027029,0.047149,-0.021958,-1.100219,-0.221004,-0.434902,-0.087751,-1.772154,-0.459564,-0.170503,-0.04456,-0.439969,-0.042038,-0.667794,-0.036024,-0.037588,0.088347,0.124844,-0.022769,0.079305,0.008898,-0.019001,-0.295453,0.198513,-0.060481,...,-0.02262,-0.020994,-0.060484,-0.130784,0.068227,-0.044324,0.148103,0.094501,-0.277805,0.180969,0.041325,-0.142479,0.119594,-0.328146,-0.182727,0.13047,-0.285574,0.078565,-0.009222,0.060892,-0.261122,0.026847,-0.035528,-0.184435,-0.161245,0.044165,-0.229941,-0.582675,0.132592,0.098392,0.181388,0.14052,-0.406777,0.152613,0.197995,-0.199333,0.873519,HEL,Leukemia,"Acute Myelogenous Leukemia (AML), M6 (Erythrol..."
ACH-000005,-0.191303,0.086,0.235074,0.068524,-0.286711,-0.337232,0.137179,-0.357927,-0.100163,0.010403,0.193095,0.08727,-0.054359,0.030757,0.209344,-0.013128,-0.049996,-0.180642,-1.098065,0.014994,-0.269943,0.035713,-1.837408,-0.324273,-0.261204,-0.039165,-0.258809,0.02645,-0.717115,-0.003977,-0.254419,-0.066099,0.053624,-0.015285,0.061832,0.033628,0.078573,-0.189015,0.100327,0.052912,...,0.066562,-0.034267,0.076829,-0.186648,-0.175506,-0.041671,0.028856,0.179234,-0.109565,0.0173,-0.018504,0.072726,0.10758,-0.186592,-0.295659,0.085028,0.069183,0.143798,-0.211096,-0.169062,-0.205673,0.186704,-0.068606,-0.245673,-0.206315,-0.234198,-0.283961,-0.485151,-0.016706,0.048799,0.09759,-0.139802,-0.09616,-0.024441,0.016988,-0.267921,0.875445,HEL9217,Leukemia,"Acute Myelogenous Leukemia (AML), M6 (Erythrol..."
ACH-000007,0.008862,-0.021161,0.102202,0.107526,-0.045557,-0.007575,0.070338,-0.265205,0.023019,0.250927,0.063849,0.069961,0.093815,-0.161213,0.020884,-0.024435,-0.197741,0.024142,-1.189605,0.024648,0.03516,-0.111406,-1.870909,-0.548806,-0.217481,-0.327512,-0.525538,-0.073879,-0.820872,-0.08011,0.116312,-0.042634,0.004091,0.02699,0.065725,-0.096931,0.171791,-0.186575,0.00614,0.013329,...,-0.203019,-0.27484,0.072157,-0.095493,0.046583,0.028542,0.141186,0.050176,-0.040564,-0.035024,-0.026,-0.108843,0.169469,0.006926,-0.10476,0.210014,-0.053852,-0.112955,0.023384,-0.160514,-0.159488,0.047137,-0.066613,-0.233574,-0.148351,-0.3826,-0.246872,-0.518788,0.010676,0.176259,0.142269,-0.020587,-0.363685,0.027735,-0.334223,-0.484621,0.750305,LS513,Colon/Colorectal Cancer,Adenocarcinoma
ACH-000009,0.006476,-0.026033,0.116825,0.196238,-0.098705,-0.124901,-0.025107,-0.251012,0.088328,0.130948,-0.052737,-0.246025,-0.013083,-0.06478,-0.119608,0.064713,-0.16708,-0.119103,-1.122296,-0.186775,-0.105022,0.02015,-1.661961,-0.495631,-0.283533,-0.101709,-0.531834,-0.02975,-0.650062,-0.225336,0.084343,-0.004804,0.069033,-0.1852,-0.012656,-0.09175,0.10589,-0.295738,-0.318818,-0.033814,...,0.067975,-0.176481,0.10232,-0.235117,-0.003277,-0.01298,0.065445,-0.004791,-0.095217,-0.106363,0.139132,0.150934,0.021566,-0.17564,-0.232412,0.218157,-0.013314,-0.203076,0.088749,-0.01684,-0.227325,0.267733,-0.23697,-0.276963,0.09302,-0.213426,-0.283986,-0.417919,0.03287,0.212718,-0.018123,-0.213931,-0.428286,0.048789,-0.115131,-0.411152,0.681762,C2BBE1,Colon/Colorectal Cancer,Adenocarcinoma


In [90]:
#function for waterfall plot where colouration based on the primary disease

def waterfall_gene_colour_by_disease(gene_name):
    # Make a new dataframe with just the columns of interest.
    gene_df = CRISPR_w_disease[['stripped_cell_line_name', gene_name, 'primary_disease']].dropna()    
    gene_df.reset_index(inplace=True)
    # Define a custom theme for colors.
    def my_theme():
        return {
            'config': {
                'range': {'category': {'scheme': 'category20'}}
            }
        }

    # Register and enable the theme.
    alt.themes.register('my_theme', my_theme)
    alt.themes.enable('my_theme')

    # Make a brush to be used for selecting data on the chart.
    brush = alt.selection(type='interval')

    # Make the waterfall chart.
    gene_chart = alt.Chart(gene_df).mark_bar().encode(
    x=alt.X('stripped_cell_line_name',
            sort='-y',
            axis = alt.Axis(title='Cell Lines', labels=False, ticks=False),
           ),
    y=alt.Y(gene_name,
           axis = alt.Axis(title='Dependency'),
           scale=alt.Scale(domain=(-2, 1))),
    color=alt.condition(brush, 'primary_disease', alt.value('lightgray'))
    ).properties(
    width=600,
    height=400
    ).add_selection(
    brush
    )
    
    # Make a line to delineate the sensitivity cutoff.
    line = alt.Chart(pd.DataFrame({'y': [-1]})).mark_rule(color='red',strokeDash=[3,3]).encode(
        y='y')
    
        # Make a line to delineate the sensitivity cutoff.
    line2 = alt.Chart(pd.DataFrame({'y': [-0.5]})).mark_rule(color='black',strokeDash=[3,3]).encode(
        y='y')
    
    # Make a second waterfall chart for the highlighted portion of the first chart.
    zoom = alt.Chart(gene_df).mark_bar().encode(
    x=alt.X('stripped_cell_line_name',
            sort='-y',
            axis = alt.Axis(title='Cell Lines')),
    y=alt.Y(gene_name,
           axis = alt.Axis(title='Dependency')),
    color='primary_disease'
).transform_filter(
    brush
)
    
    return (gene_chart + line + line2) & (zoom + line + line2)

In [91]:
waterfall_gene_colour_by_disease('A1BG (1)')

# Using PCA to cluster gene dependencies

In [92]:
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

In [93]:
# select rows for 25 to 50% dataset (list_25_50_percent)
pca25_50 = CRISPR_transpose.loc[list_25_50_percent].dropna()  #remove nan

# removing nan changed len from 186 to 139
print('Removing nan caused shape to change from', 
      CRISPR_transpose.loc[list_25_50_percent].shape,'to', pca25_50.shape) 

pca25_50.head()

Removing nan caused shape to change from (186, 992) to (139, 992)


DepMap_ID,ACH-000001,ACH-000004,ACH-000005,ACH-000007,ACH-000009,ACH-000011,ACH-000012,ACH-000013,ACH-000014,ACH-000015,ACH-000017,ACH-000018,ACH-000019,ACH-000021,ACH-000022,ACH-000023,ACH-000024,ACH-000025,ACH-000028,ACH-000029,ACH-000030,ACH-000035,ACH-000036,ACH-000037,ACH-000039,ACH-000040,ACH-000041,ACH-000042,ACH-000045,ACH-000047,ACH-000048,ACH-000052,ACH-000053,ACH-000054,ACH-000055,ACH-000060,ACH-000067,ACH-000070,ACH-000074,ACH-000075,...,ACH-002234,ACH-002239,ACH-002243,ACH-002247,ACH-002249,ACH-002250,ACH-002251,ACH-002257,ACH-002261,ACH-002263,ACH-002265,ACH-002269,ACH-002278,ACH-002280,ACH-002282,ACH-002283,ACH-002284,ACH-002285,ACH-002294,ACH-002295,ACH-002296,ACH-002297,ACH-002298,ACH-002304,ACH-002305,ACH-002315,ACH-002399,ACH-002446,ACH-002458,ACH-002459,ACH-002460,ACH-002462,ACH-002463,ACH-002464,ACH-002467,ACH-002508,ACH-002510,ACH-002512,max,min
ACTR1A (10121),-0.466656,-0.956623,-0.867062,-0.651423,-1.358966,-1.38773,-0.590756,-0.695804,-0.732964,-1.040576,-0.294724,-1.06135,-0.51178,-0.792479,-1.051147,-0.626999,-0.545092,-1.070369,-1.180615,-0.398135,-0.820228,-1.356074,-0.565855,-1.183971,-0.782087,-1.161641,-0.654717,-0.890359,-0.540973,-1.278293,-0.971149,-0.930876,-0.487782,-0.539308,-0.324166,-1.029415,-1.346487,-0.881184,-1.018772,-0.635842,...,-0.747753,-0.877034,-0.870443,-0.652965,-0.709081,-0.684294,-1.236327,-0.863664,-1.529727,-1.773192,-0.349213,-0.956748,-1.071361,-0.638965,-0.865345,-0.443475,-1.118423,-0.593423,-0.635027,-0.366659,-0.647533,-0.88665,-1.251651,-0.4499,-1.259977,-0.947654,-0.634002,-0.88984,-0.512873,-0.956573,-0.441147,-0.829385,-0.634305,-0.564092,-1.008902,-1.802201,-0.014835,-0.806272,0.022117,-1.865813
ADSL (158),-1.025149,-1.403681,-1.60186,-1.200547,-0.918574,-1.310672,-0.710678,-0.425244,-1.234554,-0.644216,-1.63163,-1.086749,-1.356498,-1.143285,-1.003027,-0.456413,-1.353337,-1.28711,-0.868085,-0.920414,-1.729608,-0.722818,-0.854384,-0.874035,-0.968164,-0.747362,-0.825822,-1.291753,-0.851839,-1.262803,-0.380668,-0.652695,-1.491799,-0.700119,-0.941326,-0.587635,-0.83979,-1.028693,-1.177948,-0.609816,...,-0.679941,-1.126018,-0.961986,-1.278371,-0.890282,-0.844526,-0.608187,-0.745394,-0.911565,-1.593865,-0.917533,-1.17799,-0.749756,-0.82604,-0.849968,-0.341783,-0.816379,-0.727108,-1.04902,-1.119679,-1.346596,-0.607087,-1.481222,-0.758286,-0.775164,-0.73835,-1.482529,-1.34508,-0.841512,-0.865664,-0.981096,-0.868842,-0.896143,-1.126021,-0.683373,-0.450931,-1.235722,-1.170108,-0.137573,-1.994726
AHCTF1 (25909),-0.844071,-1.05709,-0.928745,-0.921551,-0.967355,-0.564579,-1.094348,-0.858807,-1.060744,-0.978154,-1.012337,-0.824851,-0.997994,-0.963789,-0.9565,-0.961974,-0.762567,-1.143222,-0.866706,-0.911255,-0.928977,-0.969232,-0.699967,-0.734963,-0.680852,-0.966169,-0.780383,-0.849531,-0.853396,-1.149874,-1.136879,-1.037291,-0.853967,-0.956613,-1.182896,-0.730543,-0.893842,-0.951754,-1.110444,-0.914808,...,-0.828265,-1.035422,-0.915937,-0.944294,-0.847387,-0.66394,-1.219154,-0.733853,-1.041667,-0.858413,-0.96775,-0.773339,-0.689034,-0.771436,-1.11221,-1.037308,-0.969047,-0.771627,-0.880373,-0.973797,-0.87138,-0.871211,-0.828308,-1.046946,-0.933405,-0.678501,-0.989838,-1.084636,-1.003111,-0.864569,-0.967157,-0.903172,-1.090426,-0.674215,-0.974705,-0.86538,-0.600423,-0.735978,-0.198017,-1.41951
AK6 (102157402),-1.006928,-0.829255,-0.716226,-0.829954,-0.746732,-1.292847,-0.879031,-0.687188,-0.947238,-1.012099,-0.672657,-0.755478,-0.644715,-0.450418,-0.956215,-0.974492,-1.128432,-0.993401,-0.990854,-0.571925,-0.960557,-1.008593,-0.965133,-0.828809,-1.246804,-0.872562,-0.952158,-0.84337,-1.038388,-0.548524,-0.959348,-1.107972,-0.576941,-0.839874,-0.993875,-1.005916,-1.105394,-1.294597,-0.441693,-0.945691,...,-1.17535,-1.120884,-0.919339,-0.731832,-1.13996,-1.051012,-1.080726,-0.892561,-0.756721,-0.996568,-0.970758,-1.194729,-1.212621,-0.399563,-0.62167,-0.818474,-1.075434,-0.975254,-1.074053,-0.983984,-1.183396,-0.842399,-1.07808,-0.85708,-0.987674,-1.311001,-0.743561,-1.106353,-0.847307,-0.778986,-0.918014,-1.173851,-1.049298,-1.051324,-0.927989,-1.12468,-1.063751,-0.270748,-0.107843,-1.708815
ALG14 (199857),-0.822464,-0.625944,-0.764853,-0.787261,-1.218951,-1.038727,-1.145557,-1.154127,-1.065253,-1.193853,-0.659692,-1.226278,-0.976005,-0.956551,-0.956146,-1.269799,-0.989948,-1.139571,-1.216405,-1.095245,-0.973847,-1.021274,-1.126449,-0.999529,-0.79982,-0.958353,-0.688239,-1.162623,-1.158192,-1.003008,-1.11516,-1.202756,-0.837274,-0.695345,-0.518698,-1.054107,-1.052671,-0.83447,-0.716294,-0.964986,...,-1.207953,-0.959223,-1.17199,-0.901144,-1.080804,-0.830272,-1.085166,-1.223514,-1.156841,-0.699708,-1.280068,-0.982987,-0.904069,-1.274129,-1.063502,-1.286233,-0.9278,-1.044648,-0.947871,-0.758213,-1.185222,-0.929121,-1.111981,-1.080035,-1.13228,-1.189384,-0.913642,-1.037292,-1.095676,-0.909284,-0.725776,-0.994552,-0.934265,-0.834702,-0.614738,-0.830153,-0.873968,-0.87095,-0.042222,-1.803991


In [94]:
# scale and pca
# https://towardsdatascience.com/pca-using-python-scikit-learn-e653f8989e60

X_pca25_50 = StandardScaler().fit(pca25_50).transform(pca25_50)

pca = PCA(n_components = 2, random_state=0).fit_transform(X_pca25_50)

print(X_pca25_50.shape)

pcaDf = pd.DataFrame(data = pca, index=pca25_50.index ,columns = ['pc 1', 'pc 2'])

pcaDf.head()

(139, 992)


Unnamed: 0,pc 1,pc 2
ACTR1A (10121),6.735635,17.665292
ADSL (158),8.950649,-30.654938
AHCTF1 (25909),-0.792845,-0.374585
AK6 (102157402),-2.660893,4.158015
ALG14 (199857),-7.842208,0.383535


In [95]:
# visualize
chart25_50 = alt.Chart(pcaDf.reset_index()).mark_circle().encode(
    x='pc 1',
    y='pc 2',
    tooltip=['index','pc 1', 'pc 2'],
).properties(title='25 to 50% dataset')

chart25_50

Repeat with 25% dataset (list_25_percent) for more data points

In [96]:
# select rows
pca25 = CRISPR_transpose.loc[list_25_percent].dropna()  #remove nan

# removing nan changed len from 186 to 139
print('Removing nan caused shape to change from', 
      CRISPR_transpose.loc[list_25_percent].shape,'to', pca25.shape) 

X_pca25 = StandardScaler().fit(pca25).transform(pca25)

pca25data = PCA(n_components = 2, random_state=0).fit_transform(X_pca25)
print(X_pca25.shape)

pca25Df = pd.DataFrame(data = pca25data, index=pca25.index ,columns = ['pc 1', 'pc 2'])
print(pca25Df.shape)

# visualize
chart25 = alt.Chart(pca25Df.reset_index()).mark_circle().encode(
    x='pc 1',
    y='pc 2',
    tooltip=['index','pc 1', 'pc 2'],
).properties(title='25% dataset')

chart25

Removing nan caused shape to change from (2610, 992) to (2224, 992)
(2224, 992)
(2224, 2)


Repeat with 5 to 25% dataset (list_5_25_percent

In [97]:
# select rows
pca5_25 = CRISPR_transpose.loc[list_5_25_percent].dropna()  #remove nan

# removing nan changed len from 186 to 139
print('Removing nan caused shape to change from', 
      CRISPR_transpose.loc[list_5_25_percent].shape,'to', pca5_25.shape) 

X_pca5_25 = StandardScaler().fit(pca5_25).transform(pca5_25)

pca5_25data = PCA(n_components = 2, random_state=0).fit_transform(X_pca5_25)
print(X_pca5_25.shape)

pca5_25Df = pd.DataFrame(data = pca5_25data, index=pca5_25.index ,columns = ['pc 1', 'pc 2'])
print(pca5_25Df.shape)

# visualize
chart5_25 = alt.Chart(pca5_25Df.reset_index()).mark_circle().encode(
    x='pc 1',
    y='pc 2',
    tooltip=['index','pc 1', 'pc 2'],
).properties(title='5 to 25% dataset')

chart5_25

Removing nan caused shape to change from (433, 992) to (348, 992)
(348, 992)
(348, 2)


Repeat with 75% dataset (list_75_percent)

In [98]:
# select rows
pca75 = CRISPR_transpose.loc[list_75_percent].dropna()  #remove nan

# removing nan changed len from 186 to 139
print('Removing nan caused shape to change from', 
      CRISPR_transpose.loc[list_75_percent].shape,'to', pca75.shape) 

X_pca75 = StandardScaler().fit(pca75).transform(pca75)

pca75data = PCA(n_components = 2, random_state=0).fit_transform(X_pca75)
print(X_pca75.shape)

pca75Df = pd.DataFrame(data = pca75data, index=pca75.index ,columns = ['pc 1', 'pc 2'])
print(pca75Df.shape)

# visualize
chart75 = alt.Chart(pca75Df.reset_index()).mark_circle().encode(
    x='pc 1',
    y='pc 2',
    tooltip=['index','pc 1', 'pc 2'],
).properties(title='75% dataset')

chart75

Removing nan caused shape to change from (607, 992) to (479, 992)
(479, 992)
(479, 2)


Repeat with 50% dataset (list_50_percent

In [99]:
# select rows
pca50 = CRISPR_transpose.loc[list_50_percent].dropna()  #remove nan

# removing nan changed len from 186 to 139
print('Removing nan caused shape to change from', 
      CRISPR_transpose.loc[list_50_percent].shape,'to', pca50.shape) 

X_pca50 = StandardScaler().fit(pca50).transform(pca50)

pca50data = PCA(n_components = 2, random_state=0).fit_transform(X_pca50)
print(X_pca50.shape)

pca50Df = pd.DataFrame(data = pca50data, index=pca50.index ,columns = ['pc 1', 'pc 2'])
print(pca50Df.shape)

# visualize
chart50 = alt.Chart(pca50Df.reset_index()).mark_circle().encode(
    x='pc 1',
    y='pc 2',
    tooltip=['index','pc 1', 'pc 2'],
).properties(title='50% dataset')

chart50

Removing nan caused shape to change from (788, 992) to (616, 992)
(616, 992)
(616, 2)


Just visualizations

In [100]:
chart5_25 & chart25_50 | chart25 & chart50 & chart75