In [None]:
# initialization for my classroom
import os
from datetime import datetime as dt

def logfile(user=os.environ.get('JUPYTERHUB_USER') or 'jovyan'):
    prefix='/srv'
    if os.path.isdir(prefix) and os.access(prefix, os.W_OK):
        prefix+=('/'+user)
        if not os.path.isdir(prefix):
            os.makedirs(prefix)
    else:
        prefix='.'
    return prefix+'/'+dt.now().strftime('%Y%m%d')+'.log'

path=logfile()
#%logstop
%logstart -otq $path append

# [python - cannot override sys.excepthook - Stack Overflow](https://stackoverflow.com/questions/1261668/cannot-override-sys-excepthook/28758396)
# https://github.com/ipython/ipython/blob/e6432249582e05f438303ce73d082a0351bb383e/IPython/core/interactiveshell.py#L1952

import sys
import traceback
import IPython

try:
    _showtraceback
except NameError:
    _showtraceback=IPython.core.interactiveshell.InteractiveShell.showtraceback

try:
    _showsyntaxerror
except NameError:
    _showsyntaxerror=IPython.core.interactiveshell.InteractiveShell.showsyntaxerror

import logging
logging.basicConfig(filename=path.replace('.log','-exc.log'), format='%(asctime)s %(message)s', level=logging.ERROR, force=True)

import sys
import traceback
import IPython

def showtraceback(self, *args, **kwargs):
    etype, value, tb = self._get_exc_info(kwargs.get('exc_tuple'))
    stb = self.InteractiveTB.structured_traceback(
        etype, value, tb, tb_offset=kwargs.get('tb_offset'))
    logging.error(os.environ.get('JUPYTERHUB_USER') or 'jovyan')
    logging.error(self.InteractiveTB.stb2text(stb))
    _showtraceback(self, *args, **kwargs)

def showsyntaxerror(self, *args, **kwargs):
    etype, value, last_traceback = self._get_exc_info()
    elist = traceback.extract_tb(last_traceback) if kwargs.get('running_compiled_code') else []
    stb = self.SyntaxTB.structured_traceback(etype, value, elist)
    logging.error(os.environ.get('JUPYTERHUB_USER') or 'jovyan')
    logging.error(self.InteractiveTB.stb2text(stb))
    _showsyntaxerror(self, *args, **kwargs)

IPython.core.interactiveshell.InteractiveShell.showtraceback = showtraceback
IPython.core.interactiveshell.InteractiveShell.showsyntaxerror = showsyntaxerror

# Central dogma

* [Central dogma of molecular biology - Wikipedia](https://en.wikipedia.org/wiki/Central_dogma_of_molecular_biology)
  * [DNA replication - Wikipedia](https://en.wikipedia.org/wiki/DNA_replication) (複製)
  * [Transcription (biology) - Wikipedia](https://en.wikipedia.org/wiki/Transcription_%28biology%29) (転写)
  * [Translation (biology) - Wikipedia](https://en.wikipedia.org/wiki/Translation_%28biology%29#:~:text=In%20molecular%20biology%20and%20genetics,process%20is%20called%20gene%20expression.) (翻訳)

![fig-4](https://upload.wikimedia.org/wikipedia/commons/6/68/Central_Dogma_of_Molecular_Biochemistry_with_Enzymes.jpg)

# Replication

* DNA
  - [DNA replication - Wikipedia](https://en.wikipedia.org/wiki/DNA_replication)
    - [PDB-101: Molecule of the Month: DNA Polymerase](https://pdb101.rcsb.org/motm/3#:~:text=DNA%20polymerase%20plays%20the%20central,copy%20to%20each%20daughter%20cell.)
      - [TaKaRa Taq｜タカラバイオ株式会社](https://catalog.takara-bio.co.jp/product/basic_info.php?unitid=U100003181)
    - [DNA polymerase - Wikipedia](https://en.wikipedia.org/wiki/DNA_polymerase)
    - Chain reaction
      - [Polymerase chain reaction - Wikipedia](https://en.wikipedia.org/wiki/Polymerase_chain_reaction)
* Chromosome (Cell nucleus, Cell)
  - [Chromatid - Wikipedia](https://en.wikipedia.org/wiki/Chromatid), [Sister chromatids - Wikipedia](https://en.wikipedia.org/wiki/Sister_chromatids)
  - [Cell division - Wikipedia](https://en.wikipedia.org/wiki/Cell_division)
    - [Mitosis - Wikipedia](https://en.wikipedia.org/wiki/Mitosis) (有糸分裂)
    - [Meiosis - Wikipedia](https://en.wikipedia.org/wiki/Meiosis) (減数分裂)
  - [Chromosomal crossover - Wikipedia](https://en.wikipedia.org/wiki/Chromosomal_crossover)

## DNAポリメラーゼとPCR (Polymerase Chain Reaction)

* [PDB-101: Molecule of the Month: DNA Polymerase](https://pdb101.rcsb.org/motm/3)
* [DNAポリメラーゼ (DNA Polymerase) | 今月の分子 | PDBj 入門](https://numon.pdbj.org/mom/003?lang=ja&l=ja)
  - "A small sample of DNA is multiplied using PCR (the polymerase chain reaction), creating a large sample that may be easily analyzed. "
      - "The tiny sample is placed in a test tube, and DNA polymerase is added to make a copy. "
      - "Then the sample is heated up momentarily, and the two strands of DNA separate. Then DNA polymerase builds a new double helix from each strand."
      - "These two copies are then heated, and duplicated, yielding four copies. After repeating this many times, many identical DNA strands are produced.

![fig-10](https://cdn.rcsb.org/pdb101/motm/3/1tau.gif)

## Origin of replication

* [Origin of replication - Wikipedia](https://en.wikipedia.org/wiki/Origin_of_replication)

### oriC (origin of Chromosome)

* [Special Story バクテリアゲノムの複製開始点 | JT生命誌研究館](https://www.brh.co.jp/publication/journal/031/ss_3)

![fig-1](https://upload.wikimedia.org/wikipedia/commons/6/61/Origins_of_DNA_replication_Figure_2.jpg)

* [DNA unwinding element - Wikipedia](https://en.wikipedia.org/wiki/DNA_unwinding_element)
* [DnaA - Wikipedia](https://en.wikipedia.org/wiki/DnaA)
  - "9-mer"
  - "13-mer" in DUE
* [Frontiers | The DnaA Cycle in Escherichia coli: Activation, Function and Inactivation of the Initiator Protein](https://www.frontiersin.org/articles/10.3389/fmicb.2017.02496/full)

* EcoCyc
  - [Escherichia coli K-12 substr. MG1655 oriC](https://ecocyc.org/ECOLI/NEW-IMAGE?type=EXTRAGENIC-SITE&object=G0-10506)
  - [Escherichia coli K-12 substr. MG1655 DnaA-ATP DNA-binding transcriptional dual regulator](https://ecocyc.org/gene?orgid=ECOLI&id=MONOMER0-160#)

In [1]:
import gzip
from urllib.request import urlopen
from io import StringIO

res = urlopen("https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/005/845/GCF_000005845.2_ASM584v2/GCF_000005845.2_ASM584v2_genomic.fna.gz")
sio = StringIO(gzip.decompress(res.read()).decode("utf-8"))

from Bio import SeqIO
from Bio.Seq import Seq
seq = SeqIO.parse(sio, "fasta")

In [2]:
# only one sequence
seq0 = next(seq)

In [3]:
seq0

SeqRecord(seq=Seq('AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAG...TTC'), id='NC_000913.3', name='NC_000913.3', description='NC_000913.3 Escherichia coli str. K-12 substr. MG1655, complete genome', dbxrefs=[])

In [5]:
oriC=str(seq0.seq)[3925743:3925975]
oriC

'GATCTATTTATTTAGAGATCTGTTCTATTGTGATCTCTTATTAGGATCGCACTGCCCTGTGGATAACAAGGATCCGGCTTTTAAGATCAACAACCTGGAAAGGATCATTAACTGTGAATGATCGGTGATCCTGGACCGTATAAGCTGGGATCAGAATGAGGGGTTATACACAACTCAAAAACTGAACAACAGTTGTTCTTTGGATAACTACCGGTTGATCCAAGCTTCCTGA'

In [6]:
len(oriC)

232

## DnaA-box (9-mers)

In [8]:
import difflib

In [16]:
# complement
box = "TTATCCACA"
for i in range(len(oriC)-9):
    frg = Seq(oriC[i:i+9])
    rtp = difflib.SequenceMatcher(None, box, frg).ratio()
    rtm = difflib.SequenceMatcher(None, box, frg[::-1]).ratio()
    print(frg, "{:.2f} {:.2f}".format(rtm,rtp), end="")
    print("{:>10s}|{:<10s}".format('='*int(rtm*10),'='*int(rtp*10)), end="")
    rtp = difflib.SequenceMatcher(None, Seq(box).complement(), frg).ratio()
    rtm = difflib.SequenceMatcher(None, Seq(box).complement(), frg[::-1]).ratio()
    print("{:.2f} {:.2f}".format(rtm,rtp), end="")
    print("{:>10s}|{:<10s}".format('='*int(rtm*10),'='*int(rtp*10)))

TCTATTTAT 0.56 0.44     =====|====      0.56 0.44     =====|====      
CTATTTATT 0.56 0.44     =====|====      0.44 0.56      ====|=====     
TATTTATTT 0.56 0.44     =====|====      0.44 0.56      ====|=====     
ATTTATTTA 0.56 0.56     =====|=====     0.56 0.56     =====|=====     
TTTATTTAG 0.44 0.56      ====|=====     0.56 0.44     =====|====      
TATTTAGAG 0.44 0.44      ====|====      0.56 0.56     =====|=====     
ATTTAGAGA 0.33 0.56       ===|=====     0.44 0.56      ====|=====     
AGAGATCTG 0.56 0.33     =====|===       0.44 0.56      ====|=====     
GATCTGTTC 0.56 0.44     =====|====      0.33 0.56       ===|=====     
ATCTGTTCT 0.56 0.44     =====|====      0.44 0.56      ====|=====     
TCTGTTCTA 0.44 0.33      ====|===       0.56 0.44     =====|====      
CTGTTCTAT 0.56 0.44     =====|====      0.56 0.33     =====|===       
TGTTCTATT 0.56 0.44     =====|====      0.56 0.33     =====|===       
GTTCTATTG 0.56 0.44     =====|====      0.44 0.44      ====|====      
TTCTAT

### DUE-L,M,R (13-mers)

In [51]:
#box = "GATCTATTTATTT"
#box = "GATCTGTTCTATT"
box =  "GATCTCTTATTAG"
for i in range(len(oriC)-13):
    frg = Seq(oriC[i:i+13])
    rtp = difflib.SequenceMatcher(None, box, frg).ratio()
    rtm = difflib.SequenceMatcher(None, box, frg[::-1]).ratio()
    print(frg, "{:.2f} {:.2f}".format(rtm,rtp), end="")
    print("{:>10s}|{:<10s}".format('='*int(rtm*10),'='*int(rtp*10)), end="")
    print()

TATTTAGAGATCT 0.54 0.38     =====|===       
ATTTAGAGATCTG 0.46 0.46      ====|====      
TTTAGAGATCTGT 0.38 0.46       ===|====      
TTAGAGATCTGTT 0.31 0.54       ===|=====     
TAGAGATCTGTTC 0.23 0.54        ==|=====     
TTCTATTGTGATC 0.46 0.31      ====|===       
TCTATTGTGATCT 0.38 0.38       ===|===       
CTATTGTGATCTC 0.54 0.46     =====|====      
TTATTAGGATCGC 0.54 0.54     =====|=====     
TATTAGGATCGCA 0.46 0.46      ====|====      
ATTAGGATCGCAC 0.54 0.38     =====|===       
TTAGGATCGCACT 0.31 0.46       ===|====      
TAGGATCGCACTG 0.23 0.54        ==|=====     
AGGATCGCACTGC 0.54 0.54     =====|=====     
GGATCGCACTGCC 0.54 0.54     =====|=====     
GATCGCACTGCCC 0.54 0.54     =====|=====     
ATCGCACTGCCCT 0.46 0.46      ====|====      
TCGCACTGCCCTG 0.46 0.46      ====|====      
CGCACTGCCCTGT 0.54 0.54     =====|=====     
CACTGCCCTGTGG 0.46 0.54      ====|=====     
ACTGCCCTGTGGA 0.46 0.15      ====|=         
CTGCCCTGTGGAT 0.23 0.23        ==|==        
TGCCCTGTGG