In [298]:
# Import libraries that are required to run your project
# You are allowed to add more libraries as you need

import pandas as pd
import numpy as np
from scipy.stats import spearmanr
import pyBigWig as pbw
from tqdm import tqdm


# Everthing for Pipeline
from sklearn.pipeline import Pipeline
from sklearn.feature_selection import SelectFromModel

# Models for feature selection
from sklearn.svm import LinearSVC


# Models for regression
from sklearn.linear_model import ElasticNet
from sklearn.neural_network import MLPRegressor


from sklearn.feature_selection import SequentialFeatureSelector

## Work Package 1.1 - Modeling Choices & Data Pre-processing

In [299]:
# TODO: 
# Load your feature (bed and/or bigwig and/or fasta) and target files (tsv) here.
# Decide which features to use for training. Feel free to process them however you need.

# NOTE: 
# bed and bigwig files contain signals of all chromosomes (including sex chromosomes).
# Training and validation spl£it based on chromosomes has been done for you. 
# However, you can resplit the data in any way you want.

#path_data = "/path/to/your/data/files"  # TODO
path_data = "/Users/sidhu/Documents/GENOMICS/GENOMICS/Data/"
path_test = "/Users/sidhu/Documents/GENOMICS/GENOMICS/Data/CAGE-train/"   # X3_test_info.tsv ; TODO
histone = ["DNase-bigwig/", "H3K4me1-bigwig/", "H3K4me3-bigwig/", "H3K9me3-bigwig/", "H3K27ac-bigwig/", "H3K27me3-bigwig/", "H3K36me3-bigwig/"]
#test_genes = pd.read_csv(path_test, sep='\t')
# ---------------------------INSERT CODE HERE---------------------------


### X1

# Training and validation set (with labels)
X1_train_info = pd.read_csv(path_test + "X1_train_info.tsv", sep= '\t')
X1_train_y = pd.read_csv(path_test + "X1_train_y.tsv", sep= '\t')
X1_val_info = pd.read_csv(path_test + "X1_val_info.tsv", sep= '\t')
X1_val_y = pd.read_csv(path_test + "X1_val_y.tsv", sep= '\t')


## Dataset

# DNase
Dnase = pbw.open(path_data + str(histone[0]) + str("X1.bw"))

# Histones
H1 = pbw.open(path_data + str(histone[1]) + str("X1.bw"))
H2 = pbw.open(path_data + str(histone[2]) + str("X1.bw"))
H3 = pbw.open(path_data + str(histone[3]) + str("X1.bw"))
H4 = pbw.open(path_data + str(histone[4]) + str("X1.bw"))
H5 = pbw.open(path_data + str(histone[5]) + str("X1.bw"))
H6 = pbw.open(path_data + str(histone[6]) + str("X1.bw"))

# ---------------------------------------------------------------------- 

In [300]:
Dnase.stats("chr1", 0, 500)

[0.02410176210105419]

In [301]:
len(Dnase.intervals("chr1", 0, 500))

22

In [302]:
len(Dnase.values("chr1", 0, 500))

500

In [337]:
X1_train_info.loc[16][2:7]

gene_start    90609832
gene_end      90642862
TSS_start     90643054
TSS_end       90643104
strand               -
Name: 16, dtype: object

In [318]:
X1_train_info.shape

(14310, 7)

In [304]:
Dnase.stats(chrom, 132257692 - 40000, 132257692 + 40000)

[0.04071614332400184]

In [305]:
Dnase.stats(chrom, 132257692 - 40000, 132257692 + 40000, type="max")

[0.2114025205373764]

In [306]:
Dnase.stats(chrom, 132257692 - 40000, 132257692 + 40000, type="std")

[0.024007196001341442]

In [307]:
#It's often the case that we would instead like to compute values 
#of some number of evenly spaced bins in a given interval, which is also simple:

bins = Dnase.stats("chr1", 99, 200, type="max", nBins=2)
nbins = np.array(bins)


In [316]:
input_file = X1_train_info
chrom = str(input_file.loc[index][1])
gene_start = int(input_file.loc[index][2])
gene_end = int(input_file.loc[index][3])
TSS_start = int(input_file.loc[index][4])
TSS_end = int(input_file.loc[index][5])
strand = input_file.loc[index][6]

TSS_mid = int(TSS_start + (TSS_end-TSS_start)/2)
print(TSS_start, TSS_end, TSS_mid)
print(gene_start, gene_end)


186069154 186069204 186069179
186069155 186088073


14309

In [353]:
# Preprocess the data

# Extract more important features
highest_right = 0 
lowest_left = 1000000
# input_file is a tsv file like [X1_train_info, X1_val_info, ..]
def preprocess_z(input_file, zoom_depth = 16):
    
    Data = []
    
    for index in tqdm(input_file.index):
        
        # Extract all information from the input file
        chrom = str(input_file.loc[index][1])
        gene_start = int(input_file.loc[index][2])
        gene_end = int(input_file.loc[index][3])
        TSS_start = int(input_file.loc[index][4])
        TSS_end = int(input_file.loc[index][5])
        strand = input_file.loc[index][6]
        
        TSS_mid = int(TSS_start + (TSS_end-TSS_start)/2)
        
        if strand == '-':
            #  We have to switch the TSS_start and TSS_end values
            temp = TSS_end
            TSS_end = TSS_start
            TSS_start = temp
            
        
        # DNase
        # Extract window of values around TSS_mid and additionally for every window size also extract [mean, max, min , std, coverage]
        Dnase_stats = []
        Dnase_pic = []

        for zoom in range(0, zoom_depth):
            
            window_left = (TSS_mid - 2**zoom)
            window_right = (TSS_mid + 2**zoom)
            
            """
            if (TSS_mid - 2**zoom) < gene_start:
                window_left = gene_start
            else:
                window_left = (TSS_mid - 2**zoom)
                
            if (TSS_mid + 2**zoom) > gene_end:
                window_right = gene_end
            else:
                window_right = (TSS_mid + 2**zoom)
            """    
                
            #print("index: ", index)
            #print(window_left, window_right, window_left < window_right)
            #print(window_left, window_right)
            assert(window_left < window_right)
            assert(window_left > 0 or window_right > 0)
            
            
            if highest_right < window_right:
                highest_right = window_right
            if lowest_left > window_left:
                lowest_left = window_left
            
            # First we extract the picture 
            Dnase_pic.append(Dnase.values(chrom, window_left, window_right))
            
            # then for every picture we also extract statistics [mean, max, min, std, coverage] 
            Dnase_stats.append(Dnase.stats(chrom, window_left, window_right))
            Dnase_stats.append(Dnase.stats(chrom, window_left, window_right, type="max"))
            Dnase_stats.append(Dnase.stats(chrom, window_left, window_right, type="min"))
            Dnase_stats.append(Dnase.stats(chrom, window_left, window_right, type="std"))
            Dnase_stats.append(Dnase.stats(chrom, window_left, window_right, type="coverage"))
        
        DNase_data = [*Dnase_stats, *Dnase_pic]
        
        Data.append(Dnase_data)
    
    
    Data_np = np.array(Data)
    
    return Data_np
        
        
    

In [354]:
X1_train = preprocess_z(X1_train_info)

  0%|                                        | 20/14310 [00:00<02:23, 99.91it/s]

112658386 112658388
112658385 112658389
112658383 112658391
112658379 112658395
112658371 112658403
112658355 112658419
112658323 112658451
112658259 112658515
112658131 112658643
112657875 112658899
112657363 112659411
112656339 112660435
112654291 112662483
112650195 112666579
112642003 112674771
112625619 112691155
112691155 100000
16738667 16738669
16738666 16738670
16738664 16738672
16738660 16738676
16738652 16738684
16738636 16738700
16738604 16738732
16738540 16738796
16738412 16738924
16738156 16739180
16737644 16739692
16736620 16740716
16734572 16742764
16730476 16746860
16722284 16755052
16705900 16771436
16771436 100000
28225287 28225289
28225286 28225290
28225284 28225292
28225280 28225296
28225272 28225304
28225256 28225320
28225224 28225352
28225160 28225416
28225032 28225544
28224776 28225800
28224264 28226312
28223240 28227336
28221192 28229384
28217096 28233480
28208904 28241672
28192520 28258056
28258056 100000
28931980 28931982
28931979 28931983
28931977 28931985
2

  0%|                                        | 30/14310 [00:00<02:36, 91.39it/s]

 143692678
143668102 143700870
143651718 143717254
143717254 100000
100199145 100199147
100199144 100199148
100199142 100199150
100199138 100199154
100199130 100199162
100199114 100199178
100199082 100199210
100199018 100199274
100198890 100199402
100198634 100199658
100198122 100200170
100197098 100201194
100195050 100203242
100190954 100207338
100182762 100215530
100166378 100231914
100231914 100000
52116 52118
52115 52119
52113 52121
52109 52125
52101 52133
52085 52149
52053 52181
51989 52245
51861 52373
51605 52629
51093 53141
50069 54165
48021 56213
43925 60309
35733 68501
19349 84885
84885 19349
37953686 37953688
37953685 37953689
37953683 37953691
37953679 37953695
37953671 37953703
37953655 37953719
37953623 37953751
37953559 37953815
37953431 37953943
37953175 37954199
37952663 37954711
37951639 37955735
37949591 37957783
37945495 37961879
37937303 37970071
37920919 37986455
37986455 100000
41572040 41572042
41572039 41572043
41572037 41572045
41572033 41572049
41572025 415720

  0%|▏                                       | 51/14310 [00:00<02:41, 88.42it/s]

 92134761
92133993 92135017
92133481 92135529
92132457 92136553
92130409 92138601
92126313 92142697
92118121 92150889
92101737 92167273
92167273 100000
29717674 29717676
29717673 29717677
29717671 29717679
29717667 29717683
29717659 29717691
29717643 29717707
29717611 29717739
29717547 29717803
29717419 29717931
29717163 29718187
29716651 29718699
29715627 29719723
29713579 29721771
29709483 29725867
29701291 29734059
29684907 29750443
29750443 100000
67452442 67452444
67452441 67452445
67452439 67452447
67452435 67452451
67452427 67452459
67452411 67452475
67452379 67452507
67452315 67452571
67452187 67452699
67451931 67452955
67451419 67453467
67450395 67454491
67448347 67456539
67444251 67460635
67436059 67468827
67419675 67485211
67485211 100000
80716935 80716937
80716934 80716938
80716932 80716940
80716928 80716944
80716920 80716952
80716904 80716968
80716872 80717000
80716808 80717064
80716680 80717192
80716424 80717448
80715912 80717960
80714888 80718984
80712840 80721032
807087

  0%|▏                                       | 70/14310 [00:00<02:54, 81.60it/s]

 100000
17247146 17247148
17247145 17247149
17247143 17247151
17247139 17247155
17247131 17247163
17247115 17247179
17247083 17247211
17247019 17247275
17246891 17247403
17246635 17247659
17246123 17248171
17245099 17249195
17243051 17251243
17238955 17255339
17230763 17263531
17214379 17279915
17279915 100000
88990875 88990877
88990874 88990878
88990872 88990880
88990868 88990884
88990860 88990892
88990844 88990908
88990812 88990940
88990748 88991004
88990620 88991132
88990364 88991388
88989852 88991900
88988828 88992924
88986780 88994972
88982684 88999068
88974492 89007260
88958108 89023644
89023644 100000
27337282 27337284
27337281 27337285
27337279 27337287
27337275 27337291
27337267 27337299
27337251 27337315
27337219 27337347
27337155 27337411
27337027 27337539
27336771 27337795
27336259 27338307
27335235 27339331
27333187 27341379
27329091 27345475
27320899 27353667
27304515 27370051
27370051 100000
73096824 73096826
73096823 73096827
73096821 73096829
73096817 73096833
73096809

  1%|▏                                       | 89/14310 [00:01<02:50, 83.36it/s]

 100000
49093585 49093587
49093584 49093588
49093582 49093590
49093578 49093594
49093570 49093602
49093554 49093618
49093522 49093650
49093458 49093714
49093330 49093842
49093074 49094098
49092562 49094610
49091538 49095634
49089490 49097682
49085394 49101778
49077202 49109970
49060818 49126354
49126354 100000
115895110 115895112
115895109 115895113
115895107 115895115
115895103 115895119
115895095 115895127
115895079 115895143
115895047 115895175
115894983 115895239
115894855 115895367
115894599 115895623
115894087 115896135
115893063 115897159
115891015 115899207
115886919 115903303
115878727 115911495
115862343 115927879
115927879 100000
122981694 122981696
122981693 122981697
122981691 122981699
122981687 122981703
122981679 122981711
122981663 122981727
122981631 122981759
122981567 122981823
122981439 122981951
122981183 122982207
122980671 122982719
122979647 122983743
122977599 122985791
122973503 122989887
122965311 122998079
122948927 123014463
123014463 100000
41499304 41499

  1%|▎                                      | 109/14310 [00:01<02:47, 84.63it/s]

 136856487
136844199 136860583
136836007 136868775
136819623 136885159
136885159 100000
69234864 69234866
69234863 69234867
69234861 69234869
69234857 69234873
69234849 69234881
69234833 69234897
69234801 69234929
69234737 69234993
69234609 69235121
69234353 69235377
69233841 69235889
69232817 69236913
69230769 69238961
69226673 69243057
69218481 69251249
69202097 69267633
69267633 100000
21202179 21202181
21202178 21202182
21202176 21202184
21202172 21202188
21202164 21202196
21202148 21202212
21202116 21202244
21202052 21202308
21201924 21202436
21201668 21202692
21201156 21203204
21200132 21204228
21198084 21206276
21193988 21210372
21185796 21218564
21169412 21234948
21234948 100000
102995334 102995336
102995333 102995337
102995331 102995339
102995327 102995343
102995319 102995351
102995303 102995367
102995271 102995399
102995207 102995463
102995079 102995591
102994823 102995847
102994311 102996359
102993287 102997383
102991239 102999431
102987143 103003527
102978951 103011719
1029

  1%|▎                                      | 128/14310 [00:01<02:45, 85.66it/s]

 6851523
6850755 6851779
6850243 6852291
6849219 6853315
6847171 6855363
6843075 6859459
6834883 6867651
6818499 6884035
6884035 100000
45413593 45413595
45413592 45413596
45413590 45413598
45413586 45413602
45413578 45413610
45413562 45413626
45413530 45413658
45413466 45413722
45413338 45413850
45413082 45414106
45412570 45414618
45411546 45415642
45409498 45417690
45405402 45421786
45397210 45429978
45380826 45446362
45446362 100000
114416309 114416311
114416308 114416312
114416306 114416314
114416302 114416318
114416294 114416326
114416278 114416342
114416246 114416374
114416182 114416438
114416054 114416566
114415798 114416822
114415286 114417334
114414262 114418358
114412214 114420406
114408118 114424502
114399926 114432694
114383542 114449078
114449078 100000
48684899 48684901
48684898 48684902
48684896 48684904
48684892 48684908
48684884 48684916
48684868 48684932
48684836 48684964
48684772 48685028
48684644 48685156
48684388 48685412
48683876 48685924
48682852 48686948
4868080

  1%|▍                                      | 146/14310 [00:01<02:54, 81.12it/s]

 100000
89701679 89701681
89701678 89701682
89701676 89701684
89701672 89701688
89701664 89701696
89701648 89701712
89701616 89701744
89701552 89701808
89701424 89701936
89701168 89702192
89700656 89702704
89699632 89703728
89697584 89705776
89693488 89709872
89685296 89718064
89668912 89734448
89734448 100000
121894463 121894465
121894462 121894466
121894460 121894468
121894456 121894472
121894448 121894480
121894432 121894496
121894400 121894528
121894336 121894592
121894208 121894720
121893952 121894976
121893440 121895488
121892416 121896512
121890368 121898560
121886272 121902656
121878080 121910848
121861696 121927232
121927232 100000
62804880 62804882
62804879 62804883
62804877 62804885
62804873 62804889
62804865 62804897
62804849 62804913
62804817 62804945
62804753 62805009
62804625 62805137
62804369 62805393
62803857 62805905
62802833 62806929
62800785 62808977
62796689 62813073
62788497 62821265
62772113 62837649
62837649 100000
62372989 62372991
62372988 62372992
62372986 62

  1%|▍                                      | 165/14310 [00:01<02:55, 80.82it/s]

 100000
130366304 130366306
130366303 130366307
130366301 130366309
130366297 130366313
130366289 130366321
130366273 130366337
130366241 130366369
130366177 130366433
130366049 130366561
130365793 130366817
130365281 130367329
130364257 130368353
130362209 130370401
130358113 130374497
130349921 130382689
130333537 130399073
130399073 100000
48898878 48898880
48898877 48898881
48898875 48898883
48898871 48898887
48898863 48898895
48898847 48898911
48898815 48898943
48898751 48899007
48898623 48899135
48898367 48899391
48897855 48899903
48896831 48900927
48894783 48902975
48890687 48907071
48882495 48915263
48866111 48931647
48931647 100000
36568048 36568050
36568047 36568051
36568045 36568053
36568041 36568057
36568033 36568065
36568017 36568081
36567985 36568113
36567921 36568177
36567793 36568305
36567537 36568561
36567025 36569073
36566001 36570097
36563953 36572145
36559857 36576241
36551665 36584433
36535281 36600817
36600817 100000
45307812 45307814
45307811 45307815
45307809 45

  1%|▌                                      | 184/14310 [00:02<02:51, 82.55it/s]


40943179 40959563
40934987 40967755
40918603 40984139
40984139 100000
37225545 37225547
37225544 37225548
37225542 37225550
37225538 37225554
37225530 37225562
37225514 37225578
37225482 37225610
37225418 37225674
37225290 37225802
37225034 37226058
37224522 37226570
37223498 37227594
37221450 37229642
37217354 37233738
37209162 37241930
37192778 37258314
37258314 100000
72997000 72997002
72996999 72997003
72996997 72997005
72996993 72997009
72996985 72997017
72996969 72997033
72996937 72997065
72996873 72997129
72996745 72997257
72996489 72997513
72995977 72998025
72994953 72999049
72992905 73001097
72988809 73005193
72980617 73013385
72964233 73029769
73029769 100000
139703030 139703032
139703029 139703033
139703027 139703035
139703023 139703039
139703015 139703047
139702999 139703063
139702967 139703095
139702903 139703159
139702775 139703287
139702519 139703543
139702007 139704055
139700983 139705079
139698935 139707127
139694839 139711223
139686647 139719415
139670263 139735799
1

  1%|▌                                      | 193/14310 [00:02<02:57, 79.45it/s]

 115537158
115537158 100000
48617861 48617863
48617860 48617864
48617858 48617866
48617854 48617870
48617846 48617878
48617830 48617894
48617798 48617926
48617734 48617990
48617606 48618118
48617350 48618374
48616838 48618886
48615814 48619910
48613766 48621958
48609670 48626054
48601478 48634246
48585094 48650630
48650630 100000
75641581 75641583
75641580 75641584
75641578 75641586
75641574 75641590
75641566 75641598
75641550 75641614
75641518 75641646
75641454 75641710
75641326 75641838
75641070 75642094
75640558 75642606
75639534 75643630
75637486 75645678
75633390 75649774
75625198 75657966
75608814 75674350
75674350 100000
128371342 128371344
128371341 128371345
128371339 128371347
128371335 128371351
128371327 128371359
128371311 128371375
128371279 128371407
128371215 128371471
128371087 128371599
128370831 128371855
128370319 128372367
128369295 128373391
128367247 128375439
128363151 128379535
128354959 128387727
128338575 128404111
128404111 100000
7726820 7726822
7726819 772

  1%|▌                                      | 211/14310 [00:02<02:56, 79.79it/s]

83335124 83335132
83335120 83335136
83335112 83335144
83335096 83335160
83335064 83335192
83335000 83335256
83334872 83335384
83334616 83335640
83334104 83336152
83333080 83337176
83331032 83339224
83326936 83343320
83318744 83351512
83302360 83367896
83367896 100000
151020627 151020629
151020626 151020630
151020624 151020632
151020620 151020636
151020612 151020644
151020596 151020660
151020564 151020692
151020500 151020756
151020372 151020884
151020116 151021140
151019604 151021652
151018580 151022676
151016532 151024724
151012436 151028820
151004244 151037012
150987860 151053396
151053396 100000
183099524 183099526
183099523 183099527
183099521 183099529
183099517 183099533
183099509 183099541
183099493 183099557
183099461 183099589
183099397 183099653
183099269 183099781
183099013 183100037
183098501 183100549
183097477 183101573
183095429 183103621
183091333 183107717
183083141 183115909
183066757 183132293
183132293 100000
113488804 113488806
113488803 113488807
113488801 11348880

  2%|▋                                      | 230/14310 [00:02<02:52, 81.57it/s]

 69995008
69995008 100000
23710349 23710351
23710348 23710352
23710346 23710354
23710342 23710358
23710334 23710366
23710318 23710382
23710286 23710414
23710222 23710478
23710094 23710606
23709838 23710862
23709326 23711374
23708302 23712398
23706254 23714446
23702158 23718542
23693966 23726734
23677582 23743118
23743118 100000
33603424 33603426
33603423 33603427
33603421 33603429
33603417 33603433
33603409 33603441
33603393 33603457
33603361 33603489
33603297 33603553
33603169 33603681
33602913 33603937
33602401 33604449
33601377 33605473
33599329 33607521
33595233 33611617
33587041 33619809
33570657 33636193
33636193 100000
61429962 61429964
61429961 61429965
61429959 61429967
61429955 61429971
61429947 61429979
61429931 61429995
61429899 61430027
61429835 61430091
61429707 61430219
61429451 61430475
61428939 61430987
61427915 61432011
61425867 61434059
61421771 61438155
61413579 61446347
61397195 61462731
61462731 100000
42054980 42054982
42054979 42054983
42054977 42054985
42054973

  2%|▋                                      | 247/14310 [00:03<03:02, 77.16it/s]

 5617274
5617274 100000
43774062 43774064
43774061 43774065
43774059 43774067
43774055 43774071
43774047 43774079
43774031 43774095
43773999 43774127
43773935 43774191
43773807 43774319
43773551 43774575
43773039 43775087
43772015 43776111
43769967 43778159
43765871 43782255
43757679 43790447
43741295 43806831
43806831 100000
117675525 117675527
117675524 117675528
117675522 117675530
117675518 117675534
117675510 117675542
117675494 117675558
117675462 117675590
117675398 117675654
117675270 117675782
117675014 117676038
117674502 117676550
117673478 117677574
117671430 117679622
117667334 117683718
117659142 117691910
117642758 117708294
117708294 100000
186555541 186555543
186555540 186555544
186555538 186555546
186555534 186555550
186555526 186555558
186555510 186555574
186555478 186555606
186555414 186555670
186555286 186555798
186555030 186556054
186554518 186556566
186553494 186557590
186551446 186559638
186547350 186563734
186539158 186571926
186522774 186588310
186588310 10000

  2%|▋                                      | 263/14310 [00:03<03:12, 72.95it/s]

 617751
611607 619799
607511 623895
599319 632087
582935 648471
648471 100000
96266269 96266271
96266268 96266272
96266266 96266274
96266262 96266278
96266254 96266286
96266238 96266302
96266206 96266334
96266142 96266398
96266014 96266526
96265758 96266782
96265246 96267294
96264222 96268318
96262174 96270366
96258078 96274462
96249886 96282654
96233502 96299038
96299038 100000
26725987 26725989
26725986 26725990
26725984 26725992
26725980 26725996
26725972 26726004
26725956 26726020
26725924 26726052
26725860 26726116
26725732 26726244
26725476 26726500
26724964 26727012
26723940 26728036
26721892 26730084
26717796 26734180
26709604 26742372
26693220 26758756
26758756 100000
68539236 68539238
68539235 68539239
68539233 68539241
68539229 68539245
68539221 68539253
68539205 68539269
68539173 68539301
68539109 68539365
68538981 68539493
68538725 68539749
68538213 68540261
68537189 68541285
68535141 68543333
68531045 68547429
68522853 68555621
68506469 68572005
68572005 100000
122564450 

  2%|▊                                      | 279/14310 [00:03<03:14, 72.05it/s]

57579169 57644705
57644705 100000
113410077 113410079
113410076 113410080
113410074 113410082
113410070 113410086
113410062 113410094
113410046 113410110
113410014 113410142
113409950 113410206
113409822 113410334
113409566 113410590
113409054 113411102
113408030 113412126
113405982 113414174
113401886 113418270
113393694 113426462
113377310 113442846
113442846 100000
129974711 129974713
129974710 129974714
129974708 129974716
129974704 129974720
129974696 129974728
129974680 129974744
129974648 129974776
129974584 129974840
129974456 129974968
129974200 129975224
129973688 129975736
129972664 129976760
129970616 129978808
129966520 129982904
129958328 129991096
129941944 130007480
130007480 100000
32298873 32298875
32298872 32298876
32298870 32298878
32298866 32298882
32298858 32298890
32298842 32298906
32298810 32298938
32298746 32299002
32298618 32299130
32298362 32299386
32297850 32299898
32296826 32300922
32294778 32302970
32290682 32307066
32282490 32315258
32266106 32331642
3233

  2%|▊                                      | 288/14310 [00:03<03:05, 75.68it/s]

 46735604
46686452 46751988
46751988 100000
38062769 38062771
38062768 38062772
38062766 38062774
38062762 38062778
38062754 38062786
38062738 38062802
38062706 38062834
38062642 38062898
38062514 38063026
38062258 38063282
38061746 38063794
38060722 38064818
38058674 38066866
38054578 38070962
38046386 38079154
38030002 38095538
38095538 100000
149090713 149090715
149090712 149090716
149090710 149090718
149090706 149090722
149090698 149090730
149090682 149090746
149090650 149090778
149090586 149090842
149090458 149090970
149090202 149091226
149089690 149091738
149088666 149092762
149086618 149094810
149082522 149098906
149074330 149107098
149057946 149123482
149123482 100000
89710322 89710324
89710321 89710325
89710319 89710327
89710315 89710331
89710307 89710339
89710291 89710355
89710259 89710387
89710195 89710451
89710067 89710579
89709811 89710835
89709299 89711347
89708275 89712371
89706227 89714419
89702131 89718515
89693939 89726707
89677555 89743091
89743091 100000
194672451 1

  2%|▊                                      | 304/14310 [00:03<03:08, 74.12it/s]

 100000
73557628 73557630
73557627 73557631
73557625 73557633
73557621 73557637
73557613 73557645
73557597 73557661
73557565 73557693
73557501 73557757
73557373 73557885
73557117 73558141
73556605 73558653
73555581 73559677
73553533 73561725
73549437 73565821
73541245 73574013
73524861 73590397
73590397 100000
234569 234571
234568 234572
234566 234574
234562 234578
234554 234586
234538 234602
234506 234634
234442 234698
234314 234826
234058 235082
233546 235594
232522 236618
230474 238666
226378 242762
218186 250954
201802 267338
267338 100000
186191543 186191545
186191542 186191546
186191540 186191548
186191536 186191552
186191528 186191560
186191512 186191576
186191480 186191608
186191416 186191672
186191288 186191800
186191032 186192056
186190520 186192568
186189496 186193592
186187448 186195640
186183352 186199736
186175160 186207928
186158776 186224312
186224312 100000
27119089 27119091
27119088 27119092
27119086 27119094
27119082 27119098
27119074 27119106
27119058 27119122
27119

  2%|▊                                      | 321/14310 [00:04<02:59, 77.84it/s]

 231215051
231165899 231231435
231231435 100000
78666070 78666072
78666069 78666073
78666067 78666075
78666063 78666079
78666055 78666087
78666039 78666103
78666007 78666135
78665943 78666199
78665815 78666327
78665559 78666583
78665047 78667095
78664023 78668119
78661975 78670167
78657879 78674263
78649687 78682455
78633303 78698839
78698839 100000
77431607 77431609
77431606 77431610
77431604 77431612
77431600 77431616
77431592 77431624
77431576 77431640
77431544 77431672
77431480 77431736
77431352 77431864
77431096 77432120
77430584 77432632
77429560 77433656
77427512 77435704
77423416 77439800
77415224 77447992
77398840 77464376
77464376 100000
7495869 7495871
7495868 7495872
7495866 7495874
7495862 7495878
7495854 7495886
7495838 7495902
7495806 7495934
7495742 7495998
7495614 7496126
7495358 7496382
7494846 7496894
7493822 7497918
7491774 7499966
7487678 7504062
7479486 7512254
7463102 7528638
7528638 100000
158644728 158644730
158644727 158644731
158644725 158644733
158644721 158

  2%|▉                                      | 339/14310 [00:04<02:55, 79.78it/s]

 77360369
77348081 77364465
77339889 77372657
77323505 77389041
77389041 100000
41228362 41228364
41228361 41228365
41228359 41228367
41228355 41228371
41228347 41228379
41228331 41228395
41228299 41228427
41228235 41228491
41228107 41228619
41227851 41228875
41227339 41229387
41226315 41230411
41224267 41232459
41220171 41236555
41211979 41244747
41195595 41261131
41261131 100000
49942056 49942058
49942055 49942059
49942053 49942061
49942049 49942065
49942041 49942073
49942025 49942089
49941993 49942121
49941929 49942185
49941801 49942313
49941545 49942569
49941033 49943081
49940009 49944105
49937961 49946153
49933865 49950249
49925673 49958441
49909289 49974825
49974825 100000
134648812 134648814
134648811 134648815
134648809 134648817
134648805 134648821
134648797 134648829
134648781 134648845
134648749 134648877
134648685 134648941
134648557 134649069
134648301 134649325
134647789 134649837
134646765 134650861
134644717 134652909
134640621 134657005
134632429 134665197
134616045 13

  2%|▉                                      | 357/14310 [00:04<02:48, 83.01it/s]

 100000
27739354 27739356
27739353 27739357
27739351 27739359
27739347 27739363
27739339 27739371
27739323 27739387
27739291 27739419
27739227 27739483
27739099 27739611
27738843 27739867
27738331 27740379
27737307 27741403
27735259 27743451
27731163 27747547
27722971 27755739
27706587 27772123
27772123 100000
200306182 200306184
200306181 200306185
200306179 200306187
200306175 200306191
200306167 200306199
200306151 200306215
200306119 200306247
200306055 200306311
200305927 200306439
200305671 200306695
200305159 200307207
200304135 200308231
200302087 200310279
200297991 200314375
200289799 200322567
200273415 200338951
200338951 100000
109013503 109013505
109013502 109013506
109013500 109013508
109013496 109013512
109013488 109013520
109013472 109013536
109013440 109013568
109013376 109013632
109013248 109013760
109012992 109014016
109012480 109014528
109011456 109015552
109009408 109017600
109005312 109021696
108997120 109029888
108980736 109046272
109046272 100000
112838608 1128

  3%|█                                      | 375/14310 [00:04<02:43, 85.01it/s]

 100000
138541660 138541662
138541659 138541663
138541657 138541665
138541653 138541669
138541645 138541677
138541629 138541693
138541597 138541725
138541533 138541789
138541405 138541917
138541149 138542173
138540637 138542685
138539613 138543709
138537565 138545757
138533469 138549853
138525277 138558045
138508893 138574429
138574429 100000
40960671 40960673
40960670 40960674
40960668 40960676
40960664 40960680
40960656 40960688
40960640 40960704
40960608 40960736
40960544 40960800
40960416 40960928
40960160 40961184
40959648 40961696
40958624 40962720
40956576 40964768
40952480 40968864
40944288 40977056
40927904 40993440
40993440 100000
500696 500698
500695 500699
500693 500701
500689 500705
500681 500713
500665 500729
500633 500761
500569 500825
500441 500953
500185 501209
499673 501721
498649 502745
496601 504793
492505 508889
484313 517081
467929 533465
533465 100000
46895787 46895789
46895786 46895790
46895784 46895792
46895780 46895796
46895772 46895804
46895756 46895820
46895

  3%|█                                      | 393/14310 [00:04<02:53, 80.35it/s]

 143440329
143434185 143442377
143430089 143446473
143421897 143454665
143405513 143471049
143471049 100000
135639514 135639516
135639513 135639517
135639511 135639519
135639507 135639523
135639499 135639531
135639483 135639547
135639451 135639579
135639387 135639643
135639259 135639771
135639003 135640027
135638491 135640539
135637467 135641563
135635419 135643611
135631323 135647707
135623131 135655899
135606747 135672283
135672283 100000
79908010 79908012
79908009 79908013
79908007 79908015
79908003 79908019
79907995 79908027
79907979 79908043
79907947 79908075
79907883 79908139
79907755 79908267
79907499 79908523
79906987 79909035
79905963 79910059
79903915 79912107
79899819 79916203
79891627 79924395
79875243 79940779
79940779 100000
123233212 123233214
123233211 123233215
123233209 123233217
123233205 123233221
123233197 123233229
123233181 123233245
123233149 123233277
123233085 123233341
123232957 123233469
123232701 123233725
123232189 123234237
123231165 123235261
123229117 1

  3%|█                                      | 411/14310 [00:05<03:02, 76.10it/s]

 100000
40770103 40770105
40770102 40770106
40770100 40770108
40770096 40770112
40770088 40770120
40770072 40770136
40770040 40770168
40769976 40770232
40769848 40770360
40769592 40770616
40769080 40771128
40768056 40772152
40766008 40774200
40761912 40778296
40753720 40786488
40737336 40802872
40802872 100000
9577808 9577810
9577807 9577811
9577805 9577813
9577801 9577817
9577793 9577825
9577777 9577841
9577745 9577873
9577681 9577937
9577553 9578065
9577297 9578321
9576785 9578833
9575761 9579857
9573713 9581905
9569617 9586001
9561425 9594193
9545041 9610577
9610577 100000
66466947 66466949
66466946 66466950
66466944 66466952
66466940 66466956
66466932 66466964
66466916 66466980
66466884 66467012
66466820 66467076
66466692 66467204
66466436 66467460
66465924 66467972
66464900 66468996
66462852 66471044
66458756 66475140
66450564 66483332
66434180 66499716
66499716 100000
30066732 30066734
30066731 30066735
30066729 30066737
30066725 30066741
30066717 30066749
30066701 30066765
30066

  3%|█▏                                     | 428/14310 [00:05<02:55, 79.17it/s]

 150518610
150518610 100000
84982091 84982093
84982090 84982094
84982088 84982096
84982084 84982100
84982076 84982108
84982060 84982124
84982028 84982156
84981964 84982220
84981836 84982348
84981580 84982604
84981068 84983116
84980044 84984140
84977996 84986188
84973900 84990284
84965708 84998476
84949324 85014860
85014860 100000
6785040 6785042
6785039 6785043
6785037 6785045
6785033 6785049
6785025 6785057
6785009 6785073
6784977 6785105
6784913 6785169
6784785 6785297
6784529 6785553
6784017 6786065
6782993 6787089
6780945 6789137
6776849 6793233
6768657 6801425
6752273 6817809
6817809 100000
166168674 166168676
166168673 166168677
166168671 166168679
166168667 166168683
166168659 166168691
166168643 166168707
166168611 166168739
166168547 166168803
166168419 166168931
166168163 166169187
166167651 166169699
166166627 166170723
166164579 166172771
166160483 166176867
166152291 166185059
166135907 166201443
166201443 100000
121380653 121380655
121380652 121380656
121380650 121380658


  3%|█▏                                     | 444/14310 [00:05<03:01, 76.41it/s]

 100000
40100699 40100701
40100698 40100702
40100696 40100704
40100692 40100708
40100684 40100716
40100668 40100732
40100636 40100764
40100572 40100828
40100444 40100956
40100188 40101212
40099676 40101724
40098652 40102748
40096604 40104796
40092508 40108892
40084316 40117084
40067932 40133468
40133468 100000
165281996 165281998
165281995 165281999
165281993 165282001
165281989 165282005
165281981 165282013
165281965 165282029
165281933 165282061
165281869 165282125
165281741 165282253
165281485 165282509
165280973 165283021
165279949 165284045
165277901 165286093
165273805 165290189
165265613 165298381
165249229 165314765
165314765 100000
175961387 175961389
175961386 175961390
175961384 175961392
175961380 175961396
175961372 175961404
175961356 175961420
175961324 175961452
175961260 175961516
175961132 175961644
175960876 175961900
175960364 175962412
175959340 175963436
175957292 175965484
175953196 175969580
175945004 175977772
175928620 175994156
175994156 100000
327511 327513


  3%|█▎                                     | 461/14310 [00:05<02:57, 78.22it/s]

 167396153
167347001 167412537
167412537 100000
63911952 63911954
63911951 63911955
63911949 63911957
63911945 63911961
63911937 63911969
63911921 63911985
63911889 63912017
63911825 63912081
63911697 63912209
63911441 63912465
63910929 63912977
63909905 63914001
63907857 63916049
63903761 63920145
63895569 63928337
63879185 63944721
63944721 100000
43112652 43112654
43112651 43112655
43112649 43112657
43112645 43112661
43112637 43112669
43112621 43112685
43112589 43112717
43112525 43112781
43112397 43112909
43112141 43113165
43111629 43113677
43110605 43114701
43108557 43116749
43104461 43120845
43096269 43129037
43079885 43145421
43145421 100000
74009059 74009061
74009058 74009062
74009056 74009064
74009052 74009068
74009044 74009076
74009028 74009092
74008996 74009124
74008932 74009188
74008804 74009316
74008548 74009572
74008036 74010084
74007012 74011108
74004964 74013156
74000868 74017252
73992676 74025444
73976292 74041828
74041828 100000
30489490 30489492
30489489 30489493
3048

  3%|█▎                                     | 479/14310 [00:05<02:50, 81.08it/s]

 100000
64064337 64064339
64064336 64064340
64064334 64064342
64064330 64064346
64064322 64064354
64064306 64064370
64064274 64064402
64064210 64064466
64064082 64064594
64063826 64064850
64063314 64065362
64062290 64066386
64060242 64068434
64056146 64072530
64047954 64080722
64031570 64097106
64097106 100000
200510042 200510044
200510041 200510045
200510039 200510047
200510035 200510051
200510027 200510059
200510011 200510075
200509979 200510107
200509915 200510171
200509787 200510299
200509531 200510555
200509019 200511067
200507995 200512091
200505947 200514139
200501851 200518235
200493659 200526427
200477275 200542811
200542811 100000
123641098 123641100
123641097 123641101
123641095 123641103
123641091 123641107
123641083 123641115
123641067 123641131
123641035 123641163
123640971 123641227
123640843 123641355
123640587 123641611
123640075 123642123
123639051 123643147
123637003 123645195
123632907 123649291
123624715 123657483
123608331 123673867
123673867 100000
139315774 1393

  3%|█▎                                     | 497/14310 [00:06<02:47, 82.49it/s]

 151760679
151754535 151762727
151750439 151766823
151742247 151775015
151725863 151791399
151791399 100000
49090786 49090788
49090785 49090789
49090783 49090791
49090779 49090795
49090771 49090803
49090755 49090819
49090723 49090851
49090659 49090915
49090531 49091043
49090275 49091299
49089763 49091811
49088739 49092835
49086691 49094883
49082595 49098979
49074403 49107171
49058019 49123555
49123555 100000
86800544 86800546
86800543 86800547
86800541 86800549
86800537 86800553
86800529 86800561
86800513 86800577
86800481 86800609
86800417 86800673
86800289 86800801
86800033 86801057
86799521 86801569
86798497 86802593
86796449 86804641
86792353 86808737
86784161 86816929
86767777 86833313
86833313 100000
30474024 30474026
30474023 30474027
30474021 30474029
30474017 30474033
30474009 30474041
30473993 30474057
30473961 30474089
30473897 30474153
30473769 30474281
30473513 30474537
30473001 30475049
30471977 30476073
30469929 30478121
30465833 30482217
30457641 30490409
30441257 30506

  4%|█▍                                     | 515/14310 [00:06<02:51, 80.53it/s]

 127877615
127865327 127881711
127857135 127889903
127840751 127906287
127906287 100000
57284661 57284663
57284660 57284664
57284658 57284666
57284654 57284670
57284646 57284678
57284630 57284694
57284598 57284726
57284534 57284790
57284406 57284918
57284150 57285174
57283638 57285686
57282614 57286710
57280566 57288758
57276470 57292854
57268278 57301046
57251894 57317430
57317430 100000
4686373 4686375
4686372 4686376
4686370 4686378
4686366 4686382
4686358 4686390
4686342 4686406
4686310 4686438
4686246 4686502
4686118 4686630
4685862 4686886
4685350 4687398
4684326 4688422
4682278 4690470
4678182 4694566
4669990 4702758
4653606 4719142
4719142 100000
77786988 77786990
77786987 77786991
77786985 77786993
77786981 77786997
77786973 77787005
77786957 77787021
77786925 77787053
77786861 77787117
77786733 77787245
77786477 77787501
77785965 77788013
77784941 77789037
77782893 77791085
77778797 77795181
77770605 77803373
77754221 77819757
77819757 100000
122399988 122399990
122399987 122

  4%|█▍                                     | 533/14310 [00:06<02:55, 78.35it/s]

 13788957
13776669 13793053
13768477 13801245
13752093 13817629
13817629 100000
52127456 52127458
52127455 52127459
52127453 52127461
52127449 52127465
52127441 52127473
52127425 52127489
52127393 52127521
52127329 52127585
52127201 52127713
52126945 52127969
52126433 52128481
52125409 52129505
52123361 52131553
52119265 52135649
52111073 52143841
52094689 52160225
52160225 100000
34179028 34179030
34179027 34179031
34179025 34179033
34179021 34179037
34179013 34179045
34178997 34179061
34178965 34179093
34178901 34179157
34178773 34179285
34178517 34179541
34178005 34180053
34176981 34181077
34174933 34183125
34170837 34187221
34162645 34195413
34146261 34211797
34211797 100000
29850658 29850660
29850657 29850661
29850655 29850663
29850651 29850667
29850643 29850675
29850627 29850691
29850595 29850723
29850531 29850787
29850403 29850915
29850147 29851171
29849635 29851683
29848611 29852707
29846563 29854755
29842467 29858851
29834275 29867043
29817891 29883427
29883427 100000
62387341

  4%|█▍                                     | 549/14310 [00:06<03:07, 73.53it/s]

57045528
57033240 57049624
57025048 57057816
57008664 57074200
57074200 100000
152710 152712
152709 152713
152707 152715
152703 152719
152695 152727
152679 152743
152647 152775
152583 152839
152455 152967
152199 153223
151687 153735
150663 154759
148615 156807
144519 160903
136327 169095
119943 185479
185479 100000
42660283 42660285
42660282 42660286
42660280 42660288
42660276 42660292
42660268 42660300
42660252 42660316
42660220 42660348
42660156 42660412
42660028 42660540
42659772 42660796
42659260 42661308
42658236 42662332
42656188 42664380
42652092 42668476
42643900 42676668
42627516 42693052
42693052 100000
26244918 26244920
26244917 26244921
26244915 26244923
26244911 26244927
26244903 26244935
26244887 26244951
26244855 26244983
26244791 26245047
26244663 26245175
26244407 26245431
26243895 26245943
26242871 26246967
26240823 26249015
26236727 26253111
26228535 26261303
26212151 26277687
26277687 100000
4839792 4839794
4839791 4839795
4839789 4839797
4839785 4839801
4839777 483

  4%|█▌                                     | 565/14310 [00:07<03:11, 71.70it/s]

 89368764
89319612 89385148
89385148 100000
183825568 183825570
183825567 183825571
183825565 183825573
183825561 183825577
183825553 183825585
183825537 183825601
183825505 183825633
183825441 183825697
183825313 183825825
183825057 183826081
183824545 183826593
183823521 183827617
183821473 183829665
183817377 183833761
183809185 183841953
183792801 183858337
183858337 100000
55221048 55221050
55221047 55221051
55221045 55221053
55221041 55221057
55221033 55221065
55221017 55221081
55220985 55221113
55220921 55221177
55220793 55221305
55220537 55221561
55220025 55222073
55219001 55223097
55216953 55225145
55212857 55229241
55204665 55237433
55188281 55253817
55253817 100000
32483471 32483473
32483470 32483474
32483468 32483476
32483464 32483480
32483456 32483488
32483440 32483504
32483408 32483536
32483344 32483600
32483216 32483728
32482960 32483984
32482448 32484496
32481424 32485520
32479376 32487568
32475280 32491664
32467088 32499856
32450704 32516240
32516240 100000
87990719 87

  4%|█▌                                     | 581/14310 [00:07<03:31, 64.98it/s]

 57021664
57021664 100000
33266381 33266383
33266380 33266384
33266378 33266386
33266374 33266390
33266366 33266398
33266350 33266414
33266318 33266446
33266254 33266510
33266126 33266638
33265870 33266894
33265358 33267406
33264334 33268430
33262286 33270478
33258190 33274574
33249998 33282766
33233614 33299150
33299150 100000
43637323 43637325
43637322 43637326
43637320 43637328
43637316 43637332
43637308 43637340
43637292 43637356
43637260 43637388
43637196 43637452
43637068 43637580
43636812 43637836
43636300 43638348
43635276 43639372
43633228 43641420
43629132 43645516
43620940 43653708
43604556 43670092
43670092 100000
130716255 130716257
130716254 130716258
130716252 130716260
130716248 130716264
130716240 130716272
130716224 130716288
130716192 130716320
130716128 130716384
130716000 130716512
130715744 130716768
130715232 130717280
130714208 130718304
130712160 130720352
130708064 130724448
130699872 130732640
130683488 130749024
130749024 100000
186242611 186242613
186242610

  4%|█▋                                     | 597/14310 [00:07<03:24, 67.13it/s]

 36845383
36796231 36861767
36861767 100000
123530539 123530541
123530538 123530542
123530536 123530544
123530532 123530548
123530524 123530556
123530508 123530572
123530476 123530604
123530412 123530668
123530284 123530796
123530028 123531052
123529516 123531564
123528492 123532588
123526444 123534636
123522348 123538732
123514156 123546924
123497772 123563308
123563308 100000
36574576 36574578
36574575 36574579
36574573 36574581
36574569 36574585
36574561 36574593
36574545 36574609
36574513 36574641
36574449 36574705
36574321 36574833
36574065 36575089
36573553 36575601
36572529 36576625
36570481 36578673
36566385 36582769
36558193 36590961
36541809 36607345
36607345 100000
46021713 46021715
46021712 46021716
46021710 46021718
46021706 46021722
46021698 46021730
46021682 46021746
46021650 46021778
46021586 46021842
46021458 46021970
46021202 46022226
46020690 46022738
46019666 46023762
46017618 46025810
46013522 46029906
46005330 46038098
45988946 46054482
46054482 100000
10382156 10

  4%|█▋                                     | 612/14310 [00:07<03:18, 68.92it/s]

 154866972
154817820 154883356
154883356 100000
75209705 75209707
75209704 75209708
75209702 75209710
75209698 75209714
75209690 75209722
75209674 75209738
75209642 75209770
75209578 75209834
75209450 75209962
75209194 75210218
75208682 75210730
75207658 75211754
75205610 75213802
75201514 75217898
75193322 75226090
75176938 75242474
75242474 100000
83193402 83193404
83193401 83193405
83193399 83193407
83193395 83193411
83193387 83193419
83193371 83193435
83193339 83193467
83193275 83193531
83193147 83193659
83192891 83193915
83192379 83194427
83191355 83195451
83189307 83197499
83185211 83201595
83177019 83209787
83160635 83226171
83226171 100000
168901316 168901318
168901315 168901319
168901313 168901321
168901309 168901325
168901301 168901333
168901285 168901349
168901253 168901381
168901189 168901445
168901061 168901573
168900805 168901829
168900293 168902341
168899269 168903365
168897221 168905413
168893125 168909509
168884933 168917701
168868549 168934085
168934085 100000
1374647

  4%|█▋                                     | 628/14310 [00:08<03:10, 71.70it/s]

 100000
92424435 92424437
92424434 92424438
92424432 92424440
92424428 92424444
92424420 92424452
92424404 92424468
92424372 92424500
92424308 92424564
92424180 92424692
92423924 92424948
92423412 92425460
92422388 92426484
92420340 92428532
92416244 92432628
92408052 92440820
92391668 92457204
92457204 100000
32189916 32189918
32189915 32189919
32189913 32189921
32189909 32189925
32189901 32189933
32189885 32189949
32189853 32189981
32189789 32190045
32189661 32190173
32189405 32190429
32188893 32190941
32187869 32191965
32185821 32194013
32181725 32198109
32173533 32206301
32157149 32222685
32222685 100000
103367990 103367992
103367989 103367993
103367987 103367995
103367983 103367999
103367975 103368007
103367959 103368023
103367927 103368055
103367863 103368119
103367735 103368247
103367479 103368503
103366967 103369015
103365943 103370039
103363895 103372087
103359799 103376183
103351607 103384375
103335223 103400759
103400759 100000
2470793 2470795
2470792 2470796
2470790 2470798

  5%|█▊                                     | 648/14310 [00:08<02:43, 83.71it/s]

 100000
12049887 12049889
12049886 12049890
12049884 12049892
12049880 12049896
12049872 12049904
12049856 12049920
12049824 12049952
12049760 12050016
12049632 12050144
12049376 12050400
12048864 12050912
12047840 12051936
12045792 12053984
12041696 12058080
12033504 12066272
12017120 12082656
12082656 100000
1712318 1712320
1712317 1712321
1712315 1712323
1712311 1712327
1712303 1712335
1712287 1712351
1712255 1712383
1712191 1712447
1712063 1712575
1711807 1712831
1711295 1713343
1710271 1714367
1708223 1716415
1704127 1720511
1695935 1728703
1679551 1745087
1745087 100000
88586785 88586787
88586784 88586788
88586782 88586790
88586778 88586794
88586770 88586802
88586754 88586818
88586722 88586850
88586658 88586914
88586530 88587042
88586274 88587298
88585762 88587810
88584738 88588834
88582690 88590882
88578594 88594978
88570402 88603170
88554018 88619554
88619554 100000
118956339 118956341
118956338 118956342
118956336 118956344
118956332 118956348
118956324 118956356
118956308 118

  5%|█▊                                     | 667/14310 [00:08<02:41, 84.51it/s]

 175658310
175657542 175658566
175657030 175659078
175656006 175660102
175653958 175662150
175649862 175666246
175641670 175674438
175625286 175690822
175690822 100000
35563873 35563875
35563872 35563876
35563870 35563878
35563866 35563882
35563858 35563890
35563842 35563906
35563810 35563938
35563746 35564002
35563618 35564130
35563362 35564386
35562850 35564898
35561826 35565922
35559778 35567970
35555682 35572066
35547490 35580258
35531106 35596642
35596642 100000
39229424 39229426
39229423 39229427
39229421 39229429
39229417 39229433
39229409 39229441
39229393 39229457
39229361 39229489
39229297 39229553
39229169 39229681
39228913 39229937
39228401 39230449
39227377 39231473
39225329 39233521
39221233 39237617
39213041 39245809
39196657 39262193
39262193 100000
69369516 69369518
69369515 69369519
69369513 69369521
69369509 69369525
69369501 69369533
69369485 69369549
69369453 69369581
69369389 69369645
69369261 69369773
69369005 69370029
69368493 69370541
69367469 69371565
69365421

  5%|█▊                                     | 684/14310 [00:08<03:01, 75.16it/s]

 100000
166999340 166999342
166999339 166999343
166999337 166999345
166999333 166999349
166999325 166999357
166999309 166999373
166999277 166999405
166999213 166999469
166999085 166999597
166998829 166999853
166998317 167000365
166997293 167001389
166995245 167003437
166991149 167007533
166982957 167015725
166966573 167032109
167032109 100000
67648801 67648803
67648800 67648804
67648798 67648806
67648794 67648810
67648786 67648818
67648770 67648834
67648738 67648866
67648674 67648930
67648546 67649058
67648290 67649314
67647778 67649826
67646754 67650850
67644706 67652898
67640610 67656994
67632418 67665186
67616034 67681570
67681570 100000
129093351 129093353
129093350 129093354
129093348 129093356
129093344 129093360
129093336 129093368
129093320 129093384
129093288 129093416
129093224 129093480
129093096 129093608
129092840 129093864
129092328 129094376
129091304 129095400
129089256 129097448
129085160 129101544
129076968 129109736
129060584 129126120
129126120 100000
167754959 1677

  5%|█▊                                     | 686/14310 [00:08<02:53, 78.45it/s]

 36329928
36329928 100000
73264528 73264530
73264527 73264531
73264525 73264533
73264521 73264537
73264513 73264545
73264497 73264561
73264465 73264593
73264401 73264657
73264273 73264785
73264017 73265041
73263505 73265553
73262481 73266577
73260433 73268625
73256337 73272721
73248145 73280913





KeyboardInterrupt: 

In [212]:
# Preprocess the data

# Extract simple features with 100 bins from gene_end till gene_start
def preprocess(input_file, nbins = 100):

    Data = np.zeros((len(input_file.index), 7*nbins))

    for index in tqdm(input_file.index):
        # Extract all information from the input file
        chrom = str(input_file.loc[index][1])
        gene_start = int(input_file.loc[index][2])
        gene_end = int(input_file.loc[index][3])
        TSS_start = int(input_file.loc[index][4])
        TSS_end = int(input_file.loc[index][5])
        strand = input_file.loc[index][6]

        # Extract features from Dnase and histones, return a list with #nbins features
        dnase_data_point = Dnase.stats(chrom, gene_start, gene_end, nBins=nbins)  #type="",
        
        
        h1_data_point = H1.stats(chrom, gene_start, gene_end, nBins=nbins)
        h2_data_point = H1.stats(chrom, gene_start, gene_end, nBins=nbins)
        h3_data_point = H1.stats(chrom, gene_start, gene_end, nBins=nbins)
        h4_data_point = H1.stats(chrom, gene_start, gene_end, nBins=nbins)
        h5_data_point = H1.stats(chrom, gene_start, gene_end, nBins=nbins)
        h6_data_point = H1.stats(chrom, gene_start, gene_end, nBins=nbins)
        
        # Concatenate all the lists into one
        joined_list = [*dnase_data_point, *h1_data_point, *h2_data_point, *h3_data_point, *h4_data_point, *h5_data_point, *h6_data_point]
        
        # Transform list of features into numpy array
        joined_array = np.array(joined_list)
        
        #if np.isnan(dnase_data_point).any():
        #    print(dnase_data_point)
        #print(dnase_data_point)
        #assert(np.isnan(dnase_data_point).any())

        # Append the data
        Data[index] = joined_array
        
        # Set ever NaN point to zero
        Data[np.isnan(Data)] = 0.0
        
    return Data


In [213]:
train_X = preprocess(X1_train_info) 

100%|█████████████████████████████████████| 14310/14310 [08:13<00:00, 29.02it/s]


In [214]:
val_X = preprocess(X1_val_info)

100%|███████████████████████████████████████| 1974/1974 [00:56<00:00, 34.83it/s]


In [215]:
train_Y = X1_train_y["gex"].to_numpy()

In [216]:
val_Y = X1_val_y["gex"].to_numpy()

In [218]:
train_X.shape

(14310, 700)

# 2.MODEL SELECTION

## 2.1 ElasticNet

In [241]:
ENet = ElasticNet(random_state=0, max_iter=10000)
ENet.fit(train_X, train_Y)

ElasticNet(max_iter=10000, random_state=0)

In [243]:
ENet.score(val_X, val_Y)

0.05504081908760661

In [244]:
enet_pred = ENet.predict(train_X)

In [245]:
print(enet_pred[:10])
print(train_Y[:10])

[ 99.41529372  26.50359834  36.76269642 101.40419732  74.75437682
  -4.1998286   89.87217841  62.89044687  58.18193103  22.07669517]
[0.00000000e+00 2.23910333e+03 1.97980636e+01 4.11530623e+02
 3.42141293e+01 4.80535524e-01 0.00000000e+00 2.11435631e+00
 3.75778780e+01 0.00000000e+00]


## 2.2 MLP

In [226]:
NN = MLPRegressor(
    random_state=1, 
    max_iter=500,
    hidden_layer_sizes= (100,700),
    activation='relu',  # {‘identity’, ‘logistic’, ‘tanh’, ‘relu’}
    solver='adam',  # {‘lbfgs’, ‘sgd’, ‘adam’}
    alpha=0.0001,
    learning_rate='adaptive',  # {‘constant’, ‘invscaling’, ‘adaptive’}
    learning_rate_init=0.001,
    verbose=True,) 

NN.fit(train_X, train_Y)

Iteration 1, loss = 51956.94517390
Iteration 2, loss = 51549.73248231
Iteration 3, loss = 50887.19115628
Iteration 4, loss = 50555.51333918
Iteration 5, loss = 50298.01308233
Iteration 6, loss = 49352.20875738
Iteration 7, loss = 49488.12535131
Iteration 8, loss = 47868.82233166
Iteration 9, loss = 46883.08121425
Iteration 10, loss = 45550.93756837
Iteration 11, loss = 44299.52960970
Iteration 12, loss = 42438.07745437
Iteration 13, loss = 39965.19986611
Iteration 14, loss = 38430.52042221
Iteration 15, loss = 36200.23163826
Iteration 16, loss = 34799.38763003
Iteration 17, loss = 31945.23664408
Iteration 18, loss = 31574.11948458
Iteration 19, loss = 31147.16599362
Iteration 20, loss = 27084.39827712
Iteration 21, loss = 25512.06343993
Iteration 22, loss = 23151.59481040
Iteration 23, loss = 22100.25632971
Iteration 24, loss = 21811.26171909
Iteration 25, loss = 21856.59424119
Iteration 26, loss = 20470.93911895
Iteration 27, loss = 21154.86214555
Iteration 28, loss = 20058.78443135
I

MLPRegressor(hidden_layer_sizes=(100, 700), learning_rate='adaptive',
             max_iter=500, random_state=1, verbose=True)

In [234]:
pred_train = NN.predict(train_X)

In [235]:
pred_train[:10]

array([ 90.98290579, 415.91997644,  66.41045297, 315.4844431 ,
        65.46387629,  36.75675653, 111.68555718,  23.6712158 ,
        62.94834656,   5.96492753])

In [236]:
train_Y[:10]

array([0.00000000e+00, 2.23910333e+03, 1.97980636e+01, 4.11530623e+02,
       3.42141293e+01, 4.80535524e-01, 0.00000000e+00, 2.11435631e+00,
       3.75778780e+01, 0.00000000e+00])

In [227]:
NN.score(val_X, val_Y)

-0.07027035153093064

In [229]:
predictions = NN.predict(val_X)

In [232]:
predictions[:10]

array([ 55.02965072,  22.787415  ,   3.52358356,  45.3461477 ,
       132.00682871,  24.67981342,  91.60057475,  16.56779431,
        36.55935429, -16.02811707])

In [233]:
val_Y[:10]

array([ 0.        ,  0.        ,  0.        ,  0.        ,  0.48053552,
       13.8394231 ,  0.        ,  0.        ,  0.        , 97.83703272])

In [205]:
NN.n_outputs_

1

In [206]:
NN.out_activation_

'identity'

## 2.3 Pipeline

In [248]:
# Our regression model consists of a feature_seletion part and regression part

regr = Pipeline([
  ('feature_selection', SelectFromModel(estimator=LinearSVC(penalty="l1"))),
  ('regression', ElasticNet(random_state=0, max_iter=10000))
])
regr.fit(train_X, train_Y)

ValueError: Unknown label type: 'continuous'

In [250]:
knn = ElasticNet(random_state=0, max_iter=10000)
sfs = SequentialFeatureSelector(knn, n_features_to_select=3)
sfs.fit(train_X, train_Y)

SequentialFeatureSelector(estimator=ElasticNet(max_iter=10000, random_state=0),
                          n_features_to_select=3)

In [251]:
X = sfs.transform(train_X)

In [252]:
Enet1 = ElasticNet(random_state=0, max_iter=10000)
Enet1.fit(X, train_Y)

ElasticNet(max_iter=10000, random_state=0)

In [254]:
VAL = sfs.transform(val_X)
Enet1.score(VAL, val_Y)

0.048266931243684974