In [2]:
from reportlab.pdfgen import canvas
from reportlab.lib.units import inch, pica
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont
import random
import twobitreader
import os

## Initialize Venter books

In [3]:
venter = twobitreader.TwoBitFile("venter1.2bit")

In [4]:
venter_sizes = venter.sequence_sizes()

In [5]:
venter_total = 0
for i in venter_sizes:
    venter_total+=venter_sizes[i]

In [6]:
venter_total

2809547336L

## Initialize the fonts

In [7]:
pdfmetrics.registerFont(TTFont('VeraMO', 'fonts/VeraMono.ttf'))
pdfmetrics.registerFont(TTFont('InputMonoThin', 'fonts/InputMono/InputMono/InputMono-Thin.ttf'))
pdfmetrics.registerFont(TTFont('InputMonoNarrow', 'fonts/InputMono/InputMonoNarrow/InputMonoNarrow-Thin.ttf'))

#font, size, n_lines, n_letters, line_space, top_shift, left_shift
versions = {}
versions["vera_8"]          = ["VeraMO", 8, 101, 114, 0.1, 0.24, -0.11]
versions["vera_10"]         = ["VeraMO", 10, 84, 91, 0.12, 0.26, -0.11]

versions["input_thin_8"]    = ["InputMonoThin", 8, 101, 108, 0.1, 0.24, -0.11]
versions["input_thin_10"]   = ["InputMonoThin", 10, 84, 91, 0.12, 0.26, -0.11]

versions["input_narrow_10"] = ["InputMonoNarrow", 10, 84, 91, 0.12, 0.26, -0.11]


In [8]:
for vv in versions:
    tot_pages   = (venter_total)/(versions[vv][2]*versions[vv][3])
    tot_volumes = tot_pages/(800*2)
    print "Version {}: {} pages in {} volumes".format(vv, tot_pages, tot_volumes)

Version input_thin_8: 257567 pages in 160 volumes
Version input_narrow_10: 367549 pages in 229 volumes
Version vera_10: 367549 pages in 229 volumes
Version input_thin_10: 367549 pages in 229 volumes
Version vera_8: 244011 pages in 152 volumes


## Page printers

In [9]:
def get_page_data():
    
    p_width      = 8.5*inch
    p_heigth     = 11*inch

    p_bleed      = 0.125*inch
    p_margin     = 0.3*inch
    p_width_int  = p_width-2*p_margin-2*p_bleed
    p_heigth_int = p_heigth-2*p_margin-2*p_bleed
    
    return p_width, p_heigth, p_bleed, p_margin, p_width_int, p_heigth_int

In [10]:
def print_standard_page(_canvas, _version, _chr, _pos_start, with_header = False, skip_lines = 2, num_page=0, collection_total_pages = 248956):
    
    # Get the page data
    p_width, p_heigth, p_bleed, p_margin, p_width_int, p_heigth_int = get_page_data()
    
    # Initialize page
    _canvas.setFont(versions[_version][0], versions[_version][1])
    _canvas.translate(p_bleed+p_margin, p_bleed+p_margin)
    
    # Get version details
    n_lines    = versions[_version][2]
    n_letters  = versions[_version][3]
    line_space = versions[_version][4]
    top_shift  = versions[_version][5]
    left_shift = versions[_version][6]
    
    # Print header in case
    if with_header:
        
        _index = "Chromosome {}".format(_chr)
        _canvas.drawCentredString(left_shift*inch+p_width_int/2, p_heigth_int-top_shift*inch, _index)
        skip_lines = 4
    
    char_accum = 0
    # Print all the letters
    for i in range(n_lines-skip_lines):    

        _this_start = _pos_start+i*n_letters
        if _this_start>= venter_sizes[_chr]-1:
            break
            
        _letters    = venter[_chr][_this_start:_this_start+n_letters].upper()
        char_accum  = char_accum+len(_letters)
        
        if with_header:
            _canvas.drawString(left_shift*inch, p_heigth_int-line_space*(i)*inch-top_shift*inch-line_space*2*inch, _letters)
        else:
            _canvas.drawString(left_shift*inch, p_heigth_int-line_space*(i)*inch-top_shift*inch, _letters)
    
    # Compute last char printed
    _pos_end   = _pos_start+char_accum
    
    # Print footer - CHR1 [18228600,18240000] - PG. 11
    
    _index = "{} [{:,} / {:,}] - Page {:,} of {:,}".format(_chr.upper(), _pos_start, _pos_end, num_page+1, collection_total_pages)
    _canvas.drawCentredString(left_shift*inch+p_width_int/2, p_heigth_int-line_space*(n_lines-1)*inch-top_shift*inch, _index)
    
    return _pos_end

In [11]:
def print_filler_page(_canvas, _version):
    
    # Get the page data
    p_width, p_heigth, p_bleed, p_margin, p_width_int, p_heigth_int = get_page_data()
    
    # Get version details
    n_lines    = versions[_version][2]
    n_letters  = versions[_version][3]
    line_space = versions[_version][4]
    top_shift  = versions[_version][5]
    left_shift = versions[_version][6]
    
    _canvas.setFont(versions[_version][0], versions[_version][1])
    _canvas.translate(p_bleed+p_margin, p_bleed+p_margin)
    
    _index = "Blank page"
    _canvas.drawCentredString(left_shift*inch+p_width_int/2, p_heigth_int-top_shift*inch, _index)

In [24]:
def _get_base_page(_chr):
    
    mchr = {}
    for i in range(22):
        mchr["CHR{}".format(i+1)] = "0"*(3-len(str(i+1)))+str(i+1)+"-C-.pdf"
    mchr["CHRX"] = "023-C-.pdf"
    mchr["CHRY"] = "024-C-.pdf"
    
    return mchr[_chr.upper()]

def print_cover_page(_version, _chr, _vol_in_chromosome, _to_vols_chromosomes, _vol_in_collection, _to_vols_collection):
    
    from PyPDF2 import PdfFileMerger, PdfFileReader, PdfFileWriter
    import StringIO
    
    pdfmetrics.registerFont(TTFont('InputMonoCompressedThin', 'fonts/InputMono/InputMonoCompressed/InputMonoCompressed-Thin.ttf'))
    pdfmetrics.registerFont(TTFont('InputMonoCompressedRegular', 'fonts/InputMono/InputMonoCompressed/InputMonoCompressed-Regular.ttf'))
    
    p_width  = 21*inch
    p_heigth = 12.75*inch
    
    # Start the chapter
    packet   = StringIO.StringIO()
    _canvas  = canvas.Canvas(packet, pagesize=(p_width, p_heigth), verbosity=1)
    
    
    _chrl    = _chr.upper().replace("CHR","")
    
    for shift in [0, 5.42*inch]:
        
        _canvas.setFont("InputMonoCompressedThin", 18)
        _canvas.translate(0, 0)
        _canvas.drawCentredString(p_width/2+shift, p_heigth/2-4.8*inch, "CHR{} {}/{}".format(" "*len(str(_chrl)),
                                                                                       " "*len(str(_vol_in_chromosome+1)),
                                                                                       _to_vols_chromosomes))

        _canvas.setFont("InputMonoCompressedRegular", 18)
        _canvas.translate(0, 0)
        _canvas.drawCentredString(p_width/2+shift, p_heigth/2-4.8*inch, "   {} {} {}".format(_chrl,
                                                                                       _vol_in_chromosome+1,
                                                                                      " "*len(str(_to_vols_chromosomes))))

        _canvas.setFont("InputMonoCompressedThin", 22)
        _canvas.translate(0, 0)
        _canvas.drawCentredString(p_width/2+shift, p_heigth/2-5.2*inch, "V{}/{}".format(" "*len(str(_vol_in_collection+1)), _to_vols_collection))

        _canvas.setFont("InputMonoCompressedRegular", 22)
        _canvas.translate(0, 0)
        _canvas.drawCentredString(p_width/2+shift, p_heigth/2-5.2*inch, " {} {}".format(_vol_in_collection+1, " "*len(str(_to_vols_collection))))
    
#     _canvas.setFont("InputMonoCompressedRegular", 18)
#     _canvas.translate(0, 0)
#     _canvas.drawCentredString(p_width/2, p_heigth/2-4.8*inch, "   {} {}   ".format(_chr.upper().replace("CHR",""), _vol_in_chromosome))
    
    
    _canvas.save()
    packet.seek(0)
    
    #move to the beginning of the StringIO buffer
    existing_pdf = PdfFileReader(file("special_pages/{}".format(_get_base_page(_chr)), "rb"))
    
    new_pdf = PdfFileReader(packet)
    output  = PdfFileWriter()
    # add the "watermark" (which is the new pdf) on the existing page
    page = existing_pdf.getPage(0)
    page.mergePage(new_pdf.getPage(0))
    output.addPage(page)
    # finally, write "output" to a real file
    
    fname = "0"*(3-len(str(_vol_in_collection+1)))+str(_vol_in_collection+1)+"-C-"+_version+".pdf"
    outputStream = file(fname, "wb")
    output.write(outputStream)
    outputStream.close()

In [25]:
def print_blank_page(_canvas, _version):
    return

## Volume printers

In [26]:
from PyPDF2 import PdfFileMerger, PdfFileReader

# Main document sizes

def print_volume(_version, _chr, _pos_start, 
                 print_border=False, max_pages=1600, add_first_last = True, collection_total_pages=248956,
                 vol_in_chromosome=0, vol_in_collection=0, start_page_in_collection=0):

    
    # Get the page data
    p_width, p_heigth, p_bleed, p_margin, p_width_int, p_heigth_int = get_page_data()
    
    # Generate volume name
    volume_name  = "{}.{}_{}_{}.pdf".format(_chr, vol_in_chromosome+1, vol_in_collection+1, _version)
    if add_first_last:
        volume_name = "TODELETE_"+volume_name
        
    # Start the chapter
    c = canvas.Canvas(volume_name, pagesize=(p_width, p_heigth), verbosity=1)

    # Build the borders
    keep_unrolling  = True
    page_count      = 0
    page_count_ww   = 0
    page_start      = _pos_start
    over_chr        = False
    
    while keep_unrolling:
        
        if page_count==0 and add_first_last:
            c.showPage()
            
        if print_border:
            c.translate(0, 0)
            c.setStrokeColorRGB(0.2,0.5,0.3)
            c.rect(p_bleed,p_bleed, p_width-2*p_bleed, p_heigth-2*p_bleed)

            c.translate(p_bleed, p_bleed)
            c.setStrokeColorRGB(0.2,0.5,0.3)
            c.rect(p_margin,p_margin, p_width_int, p_heigth_int)
        else:
            c.translate(p_bleed, p_bleed)
        
        if over_chr:
            print_blank_page(c, _version)
            #print_filler_page(c, _version)
        else:
            page_end    = print_standard_page(c, _version, _chr, page_start, num_page=start_page_in_collection+page_count, collection_total_pages=collection_total_pages)
            page_start  = page_end
            page_count += 1
            
        c.showPage()
        
        page_count_ww += 1
        
        if page_count_ww>=max_pages:
            keep_unrolling = False
            
        if page_end >= venter_sizes[_chr]:
            over_chr = True
        
#         if page_count>=10:
#             keep_unrolling = False
        
    # Add the first_last
    if add_first_last:
        c.showPage()
    
    c.save()
    
    if add_first_last:
        merger = PdfFileMerger()
        fname = "0"*(3-len(str(vol_in_collection+1)))+str(vol_in_collection+1)+"-T-"+_version+".pdf"
        merger.append(PdfFileReader(file("special_pages/Page-1.pdf", 'rb')))
        merger.append(PdfFileReader(file(volume_name, 'rb')))
        merger.append(PdfFileReader(file("special_pages/Last-Page.pdf", 'rb')))
        merger.write(fname)
        os.unlink(volume_name)
        

    return page_end, over_chr, page_count

In [27]:
def print_chromosome(_version, _chr, 
                     print_border = False, 
                     max_pages    = 1596, 
                     start_page_in_collection = 0, 
                     start_vol_in_collection  = 0, 
                     collection_total_pages   = 248956,
                     tot_vols_chromosomes     = 0,
                     tot_vols_collection      = 0):
    
    
    vol_in_chromosome  = 0
    pos_start          = 0
    tot_pages_done     = 0
    
    keep_unrolling  = True
    while keep_unrolling:
        
        pos_end, over_chr, pages_done = print_volume(_version, _chr, pos_start, 
                                                     print_border = print_border, 
                                                     max_pages    = max_pages,
                                                     collection_total_pages = collection_total_pages,
                                                     #
                                                     vol_in_collection        = start_vol_in_collection+vol_in_chromosome, 
                                                     vol_in_chromosome        = vol_in_chromosome, 
                                                     start_page_in_collection = start_page_in_collection+tot_pages_done)

        print_cover_page(_version, _chr, 
                         vol_in_chromosome, tot_vols_chromosomes, 
                         start_vol_in_collection+vol_in_chromosome, tot_vols_collection)
            
        tot_pages_done    += pages_done
        vol_in_chromosome += 1
        print "Chromosome {} - volume {}-{} generated".format(_chr, vol_in_chromosome, start_vol_in_collection+vol_in_chromosome)
        
        pos_start          = pos_end
        if over_chr:
            keep_unrolling=False
            
    return vol_in_chromosome, tot_pages_done

In [28]:
def pre_compute_chromosome(_version, _chr, max_pages = 1596, skip_lines=2):
    
    # Get version details
    n_lines    = versions[_version][2]
    n_letters  = versions[_version][3]
    line_space = versions[_version][4]
    top_shift  = versions[_version][5]
    left_shift = versions[_version][6]
    
    chr_len    = venter_sizes[_chr]
    
    chr_pages      = int(chr_len/(n_letters*(n_lines-skip_lines)))
    chr_pages_left = chr_len % (n_letters*(n_lines-skip_lines))
    if chr_pages_left > 0:
        chr_pages+=1
    
    chr_volumes      = int(chr_pages/max_pages)
    chr_volumes_left = chr_pages%max_pages
    white_pages      = 0
    
    if chr_volumes_left > 0:
        chr_volumes+=1
        white_pages+=max_pages-(chr_pages%max_pages)
    
    return chr_volumes, chr_pages, white_pages
        

In [29]:
def print_collection(_version, dry_run = False, max_pages=1596, chr_list = None):
    
    total_volumes = 0
    total_pages   = 0
    
    total_volumes_exp = 0
    total_pages_exp   = 0
    total_pages_exp_w = 0
    
    if chr_list is None:
        chr_list = ["chr1", "chr2", "chr3", "chr4", "chr5", "chr6", "chr7", "chr8", "chr9", \
                    "chr10", "chr11", "chr12", "chr13", "chr14", "chr15", "chr16", "chr17", "chr18", \
                    "chr19", "chr20", "chr21", "chr22", "chrX", "chrY"]
    
    
    
    chr_volumes_store = {}
    for _chr in chr_list:
        
        chr_volumes, chr_pages, white_pages = pre_compute_chromosome(_version, _chr, max_pages=max_pages)
        chr_volumes_store[_chr] = chr_volumes
        total_volumes_exp      += chr_volumes
        total_pages_exp        += chr_pages
        total_pages_exp_w      += white_pages
        print "{}\tExpecting\t{}\tvolumes with a total of\t{}\tpages [{}\twhite]".format(_chr, chr_volumes, chr_pages, white_pages)
       
    print "Expected: {} {} [{}]".format(total_volumes_exp, total_pages_exp, total_pages_exp_w)
    
    if not dry_run:
        for _chr in chr_list:

            chr_volumes, chr_pages = print_chromosome(_version, _chr, 
                                                      start_vol_in_collection=total_volumes, 
                                                      start_page_in_collection=total_pages,
                                                      collection_total_pages=total_pages_exp,
                                                      tot_vols_chromosomes     = chr_volumes_store[_chr],
                                                      tot_vols_collection      = total_volumes_exp
                                                     )
            total_volumes += chr_volumes
            total_pages   += chr_pages
    
        print "Executed: {} {}".format(total_volumes, total_pages)

In [30]:
#print_volume("vera_8", "chr1", 0, print_border=False)
#chr_volumes, chr_pages = print_chromosome("input_thin_8", "chr22", start_vol_in_collection=123, start_page_in_collection=20000000)

In [31]:
print_collection("input_thin_8", dry_run=True)

chr1	Expecting	13	volumes with a total of	20528	pages [220	white]
chr2	Expecting	14	volumes with a total of	21962	pages [382	white]
chr3	Expecting	12	volumes with a total of	18255	pages [897	white]
chr4	Expecting	11	volumes with a total of	17463	pages [93	white]
chr5	Expecting	11	volumes with a total of	16409	pages [1147	white]
chr6	Expecting	10	volumes with a total of	15754	pages [206	white]
chr7	Expecting	9	volumes with a total of	14298	pages [66	white]
chr8	Expecting	9	volumes with a total of	13227	pages [1137	white]
chr9	Expecting	7	volumes with a total of	10341	pages [831	white]
chr10	Expecting	8	volumes with a total of	12064	pages [704	white]
chr11	Expecting	8	volumes with a total of	12243	pages [525	white]
chr12	Expecting	8	volumes with a total of	12217	pages [551	white]
chr13	Expecting	6	volumes with a total of	8921	pages [655	white]
chr14	Expecting	6	volumes with a total of	8179	pages [1397	white]
chr15	Expecting	5	volumes with a total of	7345	pages [635	white]
chr16	Expecting

In [32]:
print_collection("vera_8", dry_run=True)

chr1	Expecting	13	volumes with a total of	19447	pages [1301	white]
chr2	Expecting	14	volumes with a total of	20806	pages [1538	white]
chr3	Expecting	11	volumes with a total of	17294	pages [262	white]
chr4	Expecting	11	volumes with a total of	16544	pages [1012	white]
chr5	Expecting	10	volumes with a total of	15546	pages [414	white]
chr6	Expecting	10	volumes with a total of	14925	pages [1035	white]
chr7	Expecting	9	volumes with a total of	13546	pages [818	white]
chr8	Expecting	8	volumes with a total of	12531	pages [237	white]
chr9	Expecting	7	volumes with a total of	9797	pages [1375	white]
chr10	Expecting	8	volumes with a total of	11429	pages [1339	white]
chr11	Expecting	8	volumes with a total of	11599	pages [1169	white]
chr12	Expecting	8	volumes with a total of	11574	pages [1194	white]
chr13	Expecting	6	volumes with a total of	8452	pages [1124	white]
chr14	Expecting	5	volumes with a total of	7749	pages [231	white]
chr15	Expecting	5	volumes with a total of	6959	pages [1021	white]
chr16	E

In [33]:
#print_collection("input_thin_8")

In [36]:
print_cover_page("input_thin_8", "chrY", 0, 2, 173, 175)

saved 


In [37]:
print_cover_page("input_thin_8", "chrY", 1, 2, 174, 175)

saved 
