In [1]:
# initialization for my classroom
import os
from datetime import datetime as dt

def logfile(user=os.environ.get('JUPYTERHUB_USER') or 'jovyan'):
    prefix='/srv'
    if os.path.isdir(prefix) and os.access(prefix, os.W_OK):
        prefix+=('/'+user)
        if not os.path.isdir(prefix):
            os.makedirs(prefix)
    else:
        prefix='.'
    return prefix+'/'+dt.now().strftime('%Y%m%d')+'.log'

path=logfile()
#%logstop
%logstart -otq $path append

# [python - cannot override sys.excepthook - Stack Overflow](https://stackoverflow.com/questions/1261668/cannot-override-sys-excepthook/28758396)
# https://github.com/ipython/ipython/blob/e6432249582e05f438303ce73d082a0351bb383e/IPython/core/interactiveshell.py#L1952

import sys
import traceback
import IPython

try:
    _showtraceback
except NameError:
    _showtraceback=IPython.core.interactiveshell.InteractiveShell.showtraceback

try:
    _showsyntaxerror
except NameError:
    _showsyntaxerror=IPython.core.interactiveshell.InteractiveShell.showsyntaxerror

import logging
logging.basicConfig(filename=path.replace('.log','-exc.log'), format='%(asctime)s %(message)s', level=logging.ERROR, force=True)

import sys
import traceback
import IPython

def showtraceback(self, *args, **kwargs):
    etype, value, tb = self._get_exc_info(kwargs.get('exc_tuple'))
    stb = self.InteractiveTB.structured_traceback(
        etype, value, tb, tb_offset=kwargs.get('tb_offset'))
    logging.error(os.environ.get('JUPYTERHUB_USER') or 'jovyan')
    logging.error(self.InteractiveTB.stb2text(stb))
    _showtraceback(self, *args, **kwargs)

def showsyntaxerror(self, *args, **kwargs):
    etype, value, last_traceback = self._get_exc_info()
    elist = traceback.extract_tb(last_traceback) if kwargs.get('running_compiled_code') else []
    stb = self.SyntaxTB.structured_traceback(etype, value, elist)
    logging.error(os.environ.get('JUPYTERHUB_USER') or 'jovyan')
    logging.error(self.InteractiveTB.stb2text(stb))
    _showsyntaxerror(self, *args, **kwargs)

IPython.core.interactiveshell.InteractiveShell.showtraceback = showtraceback
IPython.core.interactiveshell.InteractiveShell.showsyntaxerror = showsyntaxerror

# Gene

* [Gene - Wikipedia](https://en.wikipedia.org/wiki/Gene#DNA)
    - "A gene is a region of *DNA* that encodes function. A *chromosome* consists of a long strand of DNA containing many genes. A human chromosome can have up to 500 million base pairs of DNA with thousands of genes."

## Inheritance

* [Heredity - Wikipedia](https://en.wikipedia.org/wiki/Heredity)
* (1865) [Mendelian inheritance - Wikipedia](https://en.wikipedia.org/wiki/Mendelian_inheritance)
  - (1822 &ndash; 1884) [Gregor Mendel - Wikipedia](https://en.wikipedia.org/wiki/Gregor_Mendel)

## Chromosome

* [Chromatin - Wikipedia](https://en.wikipedia.org/wiki/Chromatin)
  - (1843 &ndash; 1905) [Walther Flemming - Wikipedia](https://en.wikipedia.org/wiki/Walther_Flemming)
* (1888) [Chromosome - Wikipedia](https://en.wikipedia.org/wiki/Chromosome)
  - (1836 &ndash; 1921) [Heinrich Wilhelm Gottfried von Waldeyer-Hartz - Wikipedia](https://en.wikipedia.org/wiki/Heinrich_Wilhelm_Gottfried_von_Waldeyer-Hartz)
* [Homologous chromosome - Wikipedia](https://en.wikipedia.org/wiki/Homologous_chromosome)
  * [Chromatid - Wikipedia](https://en.wikipedia.org/wiki/Chromatid)
  * [Centromere - Wikipedia](https://en.wikipedia.org/wiki/Centromere)
    - [Sister chromatids - Wikipedia](https://en.wikipedia.org/wiki/Sister_chromatids)

## DNA

* (1953) [Nucleic acid double helix - Wikipedia](https://en.wikipedia.org/wiki/Nucleic_acid_double_helix), [Base pair - Wikipedia](https://en.wikipedia.org/wiki/Base_pair)
  - (1928 &ndash; ) [James Watson - Wikipedia](https://en.wikipedia.org/wiki/James_Watson#Publishing_The_Double_Helix)
  - (1916 &ndash; 2004) [Francis Crick - Wikipedia](https://en.wikipedia.org/wiki/Francis_Crick)

![fig-1](https://upload.wikimedia.org/wikipedia/commons/4/4b/Chromatin_Structures.png)

* [Histone - Wikipedia](https://en.wikipedia.org/wiki/Histone)

![fig-2](https://upload.wikimedia.org/wikipedia/commons/6/6b/1aoi.jpg)

## Fetching genetic sequence

* [Nucleic acid sequence - Wikipedia](https://en.wikipedia.org/wiki/Nucleic_acid_sequence)
  - [Nucleic acid sequence - Wikipedia](https://en.wikipedia.org/wiki/Nucleic_acid_sequence#Digital_representation)#Digital_representation
    - [FASTA format - Wikipedia](https://en.wikipedia.org/wiki/FASTA_format)
  - [Sequence database - Wikipedia](https://en.wikipedia.org/wiki/Sequence_database)
    - [GenBank - Wikipedia](https://en.wikipedia.org/wiki/GenBank)
    - [Protein Data Bank - Wikipedia](https://en.wikipedia.org/wiki/Protein_Data_Bank)
    - [UniProt - Wikipedia](https://en.wikipedia.org/wiki/UniProt#UniProtKB.2FSwiss-Prot)

### method-1

In [6]:
import gzip
from urllib.request import urlopen
from io import StringIO

res = urlopen("https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/005/845/GCF_000005845.2_ASM584v2/GCF_000005845.2_ASM584v2_genomic.fna.gz")
sio = StringIO(gzip.decompress(res.read()).decode("utf-8"))

from Bio import SeqIO
from Bio.Seq import Seq
seq = SeqIO.parse(sio, "fasta")

In [7]:
MG1655 = next(seq)

In [8]:
len(MG1655.seq)

4641652

### method-2

In [6]:
from pydna.genbank import Genbank

In [7]:
gb = Genbank("u416528d@ecs.osaka-u.ac.jp")

In [17]:
MG1655 = gb.nucleotide("U00096.3")

In [19]:
MG1655

In [9]:
len(MG1655.seq)

4641652

### six-frame translation

In [13]:
for i in range(3):
    sseq = MG1655.seq[i:i+30]
    print(sseq, sseq.translate())

AGCTTTTCATTCTGACTGCAACGGGCAATA SFSF*LQRAI
GCTTTTCATTCTGACTGCAACGGGCAATAT AFHSDCNGQY
CTTTTCATTCTGACTGCAACGGGCAATATG LFILTATGNM


In [15]:
for i in range(3):
    sseq = MG1655.seq.reverse_complement()[i:i+30]
    print(sseq, sseq.translate())

GAAAAATACTTACTAAGGCGTTTTTTATTT EKYLLRRFLF
AAAAATACTTACTAAGGCGTTTTTTATTTG KNTY*GVFYL
AAAATACTTACTAAGGCGTTTTTTATTTGG KILTKAFFIW
