In [1]:
import os
import numpy as np
#-------------------
import poseigen_seaside.basics as se
import poseigen_chisel as chis

In [None]:
hg38folder = se.NewFolder('../../../../Essentials/hg38')            #!! CHANGE TO WHATEVER YOU WANT. 

## Downloads

#### Human Genome (.fasta)

In [None]:
#genome from https://www.encodeproject.org/data-standards/reference-sequences/
genome_url = 'https://www.encodeproject.org/files/GRCh38_no_alt_analysis_set_GCA_000001405.15/@@download/GRCh38_no_alt_analysis_set_GCA_000001405.15.fasta.gz'
se.download_url(genome_url, hg38folder + 'hg38.fasta.gz')

In [None]:
!gzip -d ./hg38.fasta.gz #unzip file 

#### Blacklist Regions (.bed)

Black list regions from https://github.com/Boyle-Lab/Blacklist

In [None]:
BLR_url = 'https://raw.githubusercontent.com/Boyle-Lab/Blacklist/blob/master/lists/hg38-blacklist.v2.bed.gz'

# something weird is going on, manually download this file. 
# se. download_url(BLR_url, 'hg38-blacklist.v2.bed.gz')

In [None]:
!gzip -d hg38-blacklist.v2.bed.gz #unzip file 

## Currents for Human Genome

In [None]:
hg38, hg38_id = chis.Fasta2Seqs('hg38.fasta', idents = True) 
hg38_sizes = [len(c) for c in hg38] #Getting sizes
hg38_id = [c.split()[0] for c in hg38_id] #Simplifying ids 

se.PickleDump(hg38, hg38folder +'hg38')
se.PickleDump([hg38_id, hg38_sizes], hg38folder + 'hg38_id_sizes') 

In [None]:
# hg38 = se.PickleLoad('hg38')
# hg38_id, hg38_sizes = se.PickleLoad('hg38_id_sizes')

hg38_basic, hg38_basic_id, hg38_basic_sizes = (h[:25] for h in [hg38, hg38_id, hg38_sizes])

se.PickleDump(hg38_basic, 'hg38_basic')
se.PickleDump([hg38_basic_id, hg38_basic_sizes], 'hg38_basic_id_sizes')

In [None]:
hg38_mini, hg38_id_mini, hg38_sizes_mini = (h[:3] for h in [hg38, hg38_id, hg38_sizes])

se.PickleDump(hg38_mini, 'hg38_mini')
se.PickleDump([hg38_id_mini, hg38_sizes_mini], 'hg38_mini_id_sizes')

### Blacklist Regions

In [None]:
hg38_basic_id, hg38_basic_sizes = se.PickleLoad('hg38_basic_id_sizes')

In [None]:
# Importing Blacklist Regions bed file as Markers and converting to a BinCurrent.  
# *See First_steps for pre-work*

hg38_BLR = chis.Bed2Markers('hg38-blacklist.v2.bed') 

#Making inverse BinCurrent from Blacklist Markers (0s are bad regions)

hg38_basic_BLC = chis.Markers2BinCurrent(hg38_BLR, hg38_sizes, hg38_id, 
                                         select_BS_ids = hg38_basic_id, 
                                         Msizes = hg38_BLR.iloc[:, 3], reso = 1,
                                           inverse = True, dtype = np.int8) 

se.PickleDump(hg38_basic_BLC, 'hg38_basic_BLC')

In [None]:
#Lowering resolution and taking the minimum (0s are bad regions) 

hg38_basic_BLC = se.PickleLoad('hg38_basic_BLC')

# At a 10 nuc reso
hg38_basic_BLC_LR10 = chis.LowerResCurrent(hg38_basic_BLC, 1, 10, resomode = np.min, dtype = np.int8) 
se.PickleDump(hg38_basic_BLC_LR10, 'hg38_basic_BLC_LR10')

#At a 20 nuc reso
hg38_basic_BLC_LR20 = chis.LowerResCurrent(hg38_basic_BLC, 1, 20, resomode = np.min, dtype = np.int8) 
se.PickleDump(hg38_basic_BLC_LR20, 'hg38_basic_BLC_LR20')

#At a 50 nuc reso
hg38_basic_BLC_LR50 = chis.LowerResCurrent(hg38_basic_BLC, 1, 50, resomode = np.min, dtype = np.int8) 
se.PickleDump(hg38_basic_BLC_LR50, 'hg38_basic_BLC_LR50')