# url: https://www.youtube.com/watch?v=zzP6hSkRwDw

塩基配列: https://www.ncbi.nlm.nih.gov/nuccore/NM_001300741.2

In [1]:
# DNA分析するのに必要なライブラリー
!pip install biopython



In [2]:
from Bio import Entrez #データを引っ張ってくるために使用
from Bio import SeqIO #データの入出力

Entrez.email = "xtnphone@gmail.com"  # データを引っ張る時にメアドを指定しないといけない(ルール)

In [3]:
handle = Entrez.efetch(db="nucleotide",id="NM_001300741",rettype="gb") # id:accession_number, rettype="gb: "どういう形で持ってくるのか(GenBank形式)
nucdata = SeqIO.read(handle, "genbank") # データの入出力の指定(read: 読み込み)
print(nucdata)

ID: NM_001300741.2
Name: NM_001300741
Description: Homo sapiens nudix hydrolase 12 (NUDT12), transcript variant 2, mRNA
Number of features: 14
/molecule_type=mRNA
/topology=linear
/data_file_division=PRI
/date=25-DEC-2022
/accessions=['NM_001300741', 'XM_005272096']
/sequence_version=2
/keywords=['RefSeq']
/source=Homo sapiens (human)
/organism=Homo sapiens
/taxonomy=['Eukaryota', 'Metazoa', 'Chordata', 'Craniata', 'Vertebrata', 'Euteleostomi', 'Mammalia', 'Eutheria', 'Euarchontoglires', 'Primates', 'Haplorrhini', 'Catarrhini', 'Hominidae', 'Homo']
/references=[Reference(title='Decapping Enzyme NUDT12 Partners with BLMH for Cytoplasmic Surveillance of NAD-Capped RNAs', ...), Reference(title='Extensive disruption of protein interactions by genetic variants across the allele frequency spectrum in human populations', ...), Reference(title='Structural and mechanistic basis of mammalian Nudt12 RNA deNADding', ...), Reference(title='Architecture of the human interactome defines protein commu

## DNAの操作方法

In [4]:
print(nucdata.seq) # 配列が出てくる
print(len(nucdata.seq)) # 配列の長さ

AAGACTGCATCCGGCTCCAGGAAAAGCGAGTGGGATATCCCAATCTTTGGACTGCATCCTGGTTGCCTCTACTGTGGTCACCTTTGGGAAGAAATGTCTTCTGTAAAAAGAAGTCTGAAGCAAGAAATAGTTACTCAGTTTCACTGTTCAGCTGCTGAAGGAGATATTGCCAAGTTAACAGGAATACTCAGTCATTCTCCATCTCTTCTCAATGAAACTTCTGAAAATGGCTGGACTGCTTTAATGTGTGACAGATCAATTGTCAATAAATCAAGGCAGACTGCACTGGACATTGCTGTATTTTGGGGTTATAAGCATATAGCTAATTTACTAGCTACTGCTAAAGGTGGGAAGAAGCCTTGGTTCCTAACGAATGAAGTGGAAGAATGTGAAAATTATTTTAGCAAAACACTACTGGACCGGAAAAGTGAAAAGAGAAATAATTCTGACTGGCTGCTAGCTAAAGAAAGCCATCCAGCCACAGTTTTTATTCTTTTCTCAGATTTAAATCCCTTGGTTACTCTAGGTGGCAATAAAGAAAGTTTCCAACAGCCAGAAGTTAGGCTTTGTCAGCTGAACTACACAGATATAAAGGATTATTTGGCCCAGCCTGAGAAGATCACCTTGATTTTTCTTGGAGTAGAACTTGAAATAAAAGACAAACTACTTAATTATGCTGGTGAAGTCCCGAGAGAGGAGGAAGATGGATTGGTTGCCTGGTTTGCTCTAGGTATAGATCCTATTGCTGCTGAAGAATTCAAGCAAAGACATGAAAATTGTTACTTTCTTCATCCTCCTATGCCAGCCCTTCTGCAATTGAAAGAAAAAGAAGCTGGGGTTGTAGCTCAAGCAAGATCTGTTCTTGCCTGGCACAGTCGATACAAGTTTTGCCCAACCTGTGGAAATGCAACTAAAATTGAAGAAGGTGGCTATAAGAGATTATGTTTAAAAGAAGACTGTCCTAGTCTCAATGGCGTCCATAATACCTCATACCCAAGAG

In [5]:
print(nucdata.seq[::-1]) #逆順で出てくる

ATCTAAAATTTTTAAATTACATAATAATGTAAGTAAAGGTCATCTTATGTAAGTGCCGTGATAGTGTAGATGATAAGAAAACTTGACCGGTAAGGAAGATAAATGTATCGAAAAGTGGAGAGTCTCCAACCGTTAAATTCTCAACAATAATTTTATTATTAAATGGATCTACGACTAAAAAAGGGTTCATTCCTAATTTATAGGTTTTGTTTTACATATTGATGTGAACAACTATGAAATATAAGATGAAAGGATAAAATTGTAATACTTTTGGACCTTAAAATGTTACATTAGAATCTTAGACGTTCATAATTTTCATGTAAATCTCATTTTGACTTAAAGTTCTACGAGATAAAGTATTATTAGTTCCATGTAATAAAAGGTATATGAATACATCGAGCTTAAAATAATACATAGATTTACAGGAGACAGTCAAAATTAAACCAAATTTAAGAACTATGGTTATTCGGAAGTAGTTAAGGATTCTTTTAGTGACGAGTAATCATTTTATTTCAAAATAGACTGTCAATAGAAAATAAATTCAATTTATTTACAATTCATAGATTTTAAAATTAATTGTTATAAACCATTTAATTTAAATAAGAATAGTAAGGAAGAAAATTAAAACAGATATATATAAAAATCATTATCGAAGATCAAGGAAAATGTCCCTCTTAGCAGGAGACGTAATATATCATGTCTGACAACATTAATCAATACATTGAAAGTGATTAATCCATTTAAGTGGAAAGTGTTAAAGAAAAAGGAATAGATATTTTATTCCTTTCGATTAAAGTATATAATAGGAATTCCAGAGAACATCGAGATCGTATGTAATCATTTCTACGTTTAATATGTTTAAAACTATAAGTATTTGATATTATCGATAATTTAGTTAAAGTTTTATCGGAAAGTTTTACACTGAAAGATACAACGTTTTAACACTTGATGATACAAAACGAAAGAGATCTTAGAATGACTAAGTCGAGATTTTATATGA

In [6]:
print(nucdata.seq.complement()) #相補鎖(2重螺旋)

TTCTGACGTAGGCCGAGGTCCTTTTCGCTCACCCTATAGGGTTAGAAACCTGACGTAGGACCAACGGAGATGACACCAGTGGAAACCCTTCTTTACAGAAGACATTTTTCTTCAGACTTCGTTCTTTATCAATGAGTCAAAGTGACAAGTCGACGACTTCCTCTATAACGGTTCAATTGTCCTTATGAGTCAGTAAGAGGTAGAGAAGAGTTACTTTGAAGACTTTTACCGACCTGACGAAATTACACACTGTCTAGTTAACAGTTATTTAGTTCCGTCTGACGTGACCTGTAACGACATAAAACCCCAATATTCGTATATCGATTAAATGATCGATGACGATTTCCACCCTTCTTCGGAACCAAGGATTGCTTACTTCACCTTCTTACACTTTTAATAAAATCGTTTTGTGATGACCTGGCCTTTTCACTTTTCTCTTTATTAAGACTGACCGACGATCGATTTCTTTCGGTAGGTCGGTGTCAAAAATAAGAAAAGAGTCTAAATTTAGGGAACCAATGAGATCCACCGTTATTTCTTTCAAAGGTTGTCGGTCTTCAATCCGAAACAGTCGACTTGATGTGTCTATATTTCCTAATAAACCGGGTCGGACTCTTCTAGTGGAACTAAAAAGAACCTCATCTTGAACTTTATTTTCTGTTTGATGAATTAATACGACCACTTCAGGGCTCTCTCCTCCTTCTACCTAACCAACGGACCAAACGAGATCCATATCTAGGATAACGACGACTTCTTAAGTTCGTTTCTGTACTTTTAACAATGAAAGAAGTAGGAGGATACGGTCGGGAAGACGTTAACTTTCTTTTTCTTCGACCCCAACATCGAGTTCGTTCTAGACAAGAACGGACCGTGTCAGCTATGTTCAAAACGGGTTGGACACCTTTACGTTGATTTTAACTTCTTCCACCGATATTCTCTAATACAAATTTTCTTCTGACAGGATCAGAGTTACCGCAGGTATTATGGAGTATGGGTTCTC

In [7]:
print(nucdata.seq.reverse_complement()) #逆相補鎖

TAGATTTTAAAAATTTAATGTATTATTACATTCATTTCCAGTAGAATACATTCACGGCACTATCACATCTACTATTCTTTTGAACTGGCCATTCCTTCTATTTACATAGCTTTTCACCTCTCAGAGGTTGGCAATTTAAGAGTTGTTATTAAAATAATAATTTACCTAGATGCTGATTTTTTCCCAAGTAAGGATTAAATATCCAAAACAAAATGTATAACTACACTTGTTGATACTTTATATTCTACTTTCCTATTTTAACATTATGAAAACCTGGAATTTTACAATGTAATCTTAGAATCTGCAAGTATTAAAAGTACATTTAGAGTAAAACTGAATTTCAAGATGCTCTATTTCATAATAATCAAGGTACATTATTTTCCATATACTTATGTAGCTCGAATTTTATTATGTATCTAAATGTCCTCTGTCAGTTTTAATTTGGTTTAAATTCTTGATACCAATAAGCCTTCATCAATTCCTAAGAAAATCACTGCTCATTAGTAAAATAAAGTTTTATCTGACAGTTATCTTTTATTTAAGTTAAATAAATGTTAAGTATCTAAAATTTTAATTAACAATATTTGGTAAATTAAATTTATTCTTATCATTCCTTCTTTTAATTTTGTCTATATATATTTTTAGTAATAGCTTCTAGTTCCTTTTACAGGGAGAATCGTCCTCTGCATTATATAGTACAGACTGTTGTAATTAGTTATGTAACTTTCACTAATTAGGTAAATTCACCTTTCACAATTTCTTTTTCCTTATCTATAAAATAAGGAAAGCTAATTTCATATATTATCCTTAAGGTCTCTTGTAGCTCTAGCATACATTAGTAAAGATGCAAATTATACAAATTTTGATATTCATAAACTATAATAGCTATTAAATCAATTTCAAAATAGCCTTTCAAAATGTGACTTTCTATGTTGCAAAATTGTGAACTACTATGTTTTGCTTTCTCTAGAATCTTACTGATTCAGCTCTAAAATATACT

In [8]:
print(nucdata.seq.transcribe()) #RNAに転写された配列

AAGACUGCAUCCGGCUCCAGGAAAAGCGAGUGGGAUAUCCCAAUCUUUGGACUGCAUCCUGGUUGCCUCUACUGUGGUCACCUUUGGGAAGAAAUGUCUUCUGUAAAAAGAAGUCUGAAGCAAGAAAUAGUUACUCAGUUUCACUGUUCAGCUGCUGAAGGAGAUAUUGCCAAGUUAACAGGAAUACUCAGUCAUUCUCCAUCUCUUCUCAAUGAAACUUCUGAAAAUGGCUGGACUGCUUUAAUGUGUGACAGAUCAAUUGUCAAUAAAUCAAGGCAGACUGCACUGGACAUUGCUGUAUUUUGGGGUUAUAAGCAUAUAGCUAAUUUACUAGCUACUGCUAAAGGUGGGAAGAAGCCUUGGUUCCUAACGAAUGAAGUGGAAGAAUGUGAAAAUUAUUUUAGCAAAACACUACUGGACCGGAAAAGUGAAAAGAGAAAUAAUUCUGACUGGCUGCUAGCUAAAGAAAGCCAUCCAGCCACAGUUUUUAUUCUUUUCUCAGAUUUAAAUCCCUUGGUUACUCUAGGUGGCAAUAAAGAAAGUUUCCAACAGCCAGAAGUUAGGCUUUGUCAGCUGAACUACACAGAUAUAAAGGAUUAUUUGGCCCAGCCUGAGAAGAUCACCUUGAUUUUUCUUGGAGUAGAACUUGAAAUAAAAGACAAACUACUUAAUUAUGCUGGUGAAGUCCCGAGAGAGGAGGAAGAUGGAUUGGUUGCCUGGUUUGCUCUAGGUAUAGAUCCUAUUGCUGCUGAAGAAUUCAAGCAAAGACAUGAAAAUUGUUACUUUCUUCAUCCUCCUAUGCCAGCCCUUCUGCAAUUGAAAGAAAAAGAAGCUGGGGUUGUAGCUCAAGCAAGAUCUGUUCUUGCCUGGCACAGUCGAUACAAGUUUUGCCCAACCUGUGGAAAUGCAACUAAAAUUGAAGAAGGUGGCUAUAAGAGAUUAUGUUUAAAAGAAGACUGUCCUAGUCUCAAUGGCGUCCAUAAUACCUCAUACCCAAGAG

In [9]:
print(nucdata.seq[93:1428].translate()) #アミノ酸配列の翻訳

MSSVKRSLKQEIVTQFHCSAAEGDIAKLTGILSHSPSLLNETSENGWTALMCDRSIVNKSRQTALDIAVFWGYKHIANLLATAKGGKKPWFLTNEVEECENYFSKTLLDRKSEKRNNSDWLLAKESHPATVFILFSDLNPLVTLGGNKESFQQPEVRLCQLNYTDIKDYLAQPEKITLIFLGVELEIKDKLLNYAGEVPREEEDGLVAWFALGIDPIAAEEFKQRHENCYFLHPPMPALLQLKEKEAGVVAQARSVLAWHSRYKFCPTCGNATKIEEGGYKRLCLKEDCPSLNGVHNTSYPRVDPVVIMQVIHPDGTKCLLGRQKRFPPGMFTCLAGFIEPGETIEDAVRREVEEESGVKVGHVQYVACQPWPMPSSLMIGCLALAVSTEIKVDKNEIEDARWFTREQVLDVLTKGKQQAFFVPPSRAIAHQLIKHWIRINPNL*


In [10]:
# 文字の数をカウントする
from collections import Counter
display(Counter(nucdata.seq))
display(Counter(nucdata.seq[93:1428].translate()))

Counter({'A': 1156, 'G': 620, 'C': 543, 'T': 1115})

Counter({'M': 7,
         'S': 25,
         'V': 30,
         'K': 32,
         'R': 20,
         'L': 46,
         'Q': 18,
         'E': 38,
         'I': 25,
         'T': 21,
         'F': 18,
         'H': 12,
         'C': 13,
         'A': 32,
         'G': 26,
         'D': 18,
         'P': 25,
         'N': 19,
         'W': 9,
         'Y': 10,
         '*': 1})

In [11]:
# from Bio.SeqUtils import GC
# GC(nucdata.seq) #GC含有量