In [9]:
# オリジナルモジュールのインポート
from lib.introngap import PileUp
from lib.gbkparse import Seq_count

# モジュールのインポート
import itertools
import logomaker
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [10]:
# クラスのインスタンス化
gbk = Seq_count()

# gbkファイルの読み込み
gbk.read_gbk('../data/gbk/human_ttn.gb')

# 各種バリアントの可視化
gbk.transcript_variants()

デフォルト値として、最もエクソンの多いNM_001267550.2を設定
デフォルト値として、最もエクソンの多いNM_001267550.2を設定


In [11]:
# すべてのエクソンの配列をvalueとする辞書を作成
# エクソン番号は、バリアントに関わらず統一的なナンバリングにする
var = gbk.get_mrna_ids()

all_exon_list = []
for i in var:
    gbk.set_mrna_id(i)
    for j in gbk.exon_list():
        if j not in all_exon_list:
            all_exon_list.append(j)

all_exon_list.sort(key=lambda x: x[0])

exon_dic = {}
for i in range(len(all_exon_list)):
    exon_dic[i+1] = all_exon_list[i]

In [12]:
# 各バリンアントのおける統一的エクソンナンバーの組み合わせを作成
def get_key(value):
    for i in exon_dic.keys():
        if exon_dic[i] == value:
            return i

variant_dic = {}
for v in var:
    gbk.set_mrna_id(v)
    tmp_list = []
    for i in gbk.exon_list():
            tmp_list.append(get_key(i))
    variant_dic[v] = tmp_list


In [13]:
# 存在しうるエクソンの組み合わせを作成
exon_comb = []
for i in var:
    ls = variant_dic[i]
    for i in range(len(ls)-1):
        exon_comb.append((ls[i], ls[i+1]))
exon_comb = set(exon_comb)
exon_comb

{(1, 2),
 (2, 3),
 (3, 4),
 (4, 5),
 (5, 6),
 (6, 7),
 (7, 8),
 (8, 9),
 (8, 10),
 (9, 11),
 (10, 11),
 (11, 12),
 (11, 13),
 (12, 13),
 (13, 14),
 (14, 15),
 (15, 16),
 (16, 17),
 (17, 18),
 (18, 19),
 (19, 20),
 (20, 21),
 (21, 22),
 (22, 23),
 (23, 24),
 (24, 25),
 (25, 26),
 (26, 27),
 (27, 28),
 (28, 29),
 (29, 30),
 (30, 31),
 (31, 32),
 (32, 33),
 (33, 34),
 (34, 35),
 (35, 36),
 (36, 37),
 (37, 38),
 (38, 39),
 (39, 40),
 (40, 41),
 (41, 42),
 (42, 43),
 (43, 44),
 (44, 45),
 (45, 46),
 (45, 47),
 (45, 48),
 (46, 47),
 (46, 48),
 (47, 48),
 (48, 49),
 (48, 50),
 (48, 51),
 (50, 51),
 (51, 52),
 (51, 223),
 (52, 53),
 (53, 54),
 (54, 55),
 (55, 56),
 (56, 57),
 (57, 58),
 (58, 59),
 (59, 60),
 (60, 61),
 (61, 62),
 (62, 63),
 (63, 64),
 (64, 65),
 (65, 66),
 (66, 67),
 (67, 68),
 (68, 69),
 (69, 70),
 (70, 71),
 (71, 72),
 (72, 73),
 (73, 74),
 (74, 75),
 (75, 76),
 (76, 77),
 (77, 78),
 (78, 79),
 (79, 80),
 (80, 81),
 (81, 82),
 (82, 83),
 (83, 84),
 (84, 85),
 (85, 86),
 (86,

In [14]:
# exon40からexon230までの範囲に解析を限定する
exon_comb_ltd = []
for i,j in exon_comb:
    if i >= 40 and j <= 230:
        exon_comb_ltd.append((i,j))

exon_comb_ltd = sorted(exon_comb_ltd)
print("exon_comb_ltdの要素数:", len(exon_comb_ltd))
exon_comb_ltd

exon_comb_ltdの要素数: 225


[(40, 41),
 (41, 42),
 (42, 43),
 (43, 44),
 (44, 45),
 (45, 46),
 (45, 47),
 (45, 48),
 (46, 47),
 (46, 48),
 (47, 48),
 (48, 49),
 (48, 50),
 (48, 51),
 (50, 51),
 (51, 52),
 (51, 223),
 (52, 53),
 (53, 54),
 (54, 55),
 (55, 56),
 (56, 57),
 (57, 58),
 (58, 59),
 (59, 60),
 (60, 61),
 (61, 62),
 (62, 63),
 (63, 64),
 (64, 65),
 (65, 66),
 (66, 67),
 (67, 68),
 (68, 69),
 (69, 70),
 (70, 71),
 (71, 72),
 (72, 73),
 (73, 74),
 (74, 75),
 (75, 76),
 (76, 77),
 (77, 78),
 (78, 79),
 (79, 80),
 (80, 81),
 (81, 82),
 (82, 83),
 (83, 84),
 (84, 85),
 (85, 86),
 (86, 87),
 (87, 88),
 (88, 89),
 (89, 90),
 (90, 91),
 (91, 92),
 (92, 93),
 (93, 94),
 (94, 95),
 (95, 96),
 (96, 97),
 (97, 98),
 (98, 99),
 (99, 100),
 (100, 101),
 (101, 102),
 (102, 103),
 (103, 104),
 (104, 105),
 (105, 106),
 (106, 107),
 (107, 108),
 (108, 109),
 (109, 110),
 (110, 111),
 (111, 112),
 (112, 113),
 (113, 114),
 (114, 115),
 (115, 116),
 (116, 117),
 (117, 118),
 (117, 120),
 (118, 119),
 (119, 120),
 (120, 121

In [15]:
#  特殊なスプライスを受けるエクソン
for i in exon_dic.keys():
    if i < len(exon_dic.keys())-1:
        if exon_dic[i][1] > exon_dic[i+1][0]:
            print(i)

9
173
184


In [16]:
# エクソンを一つ以上飛ばす組み合わせを作成
exon_distant_comb = []
for i,j in exon_comb_ltd:
    if j - i > 1:
        exon_distant_comb.append((i,j))
# exon_distant_comb

In [17]:
space_filling_comb = []
for i,j in exon_distant_comb:
    for k in range(j-i):
        space_filling_comb.append((i,i+k+1))

space_filling_comb = set(space_filling_comb)
space_filling_comb

{(45, 46),
 (45, 47),
 (45, 48),
 (46, 47),
 (46, 48),
 (48, 49),
 (48, 50),
 (48, 51),
 (51, 52),
 (51, 53),
 (51, 54),
 (51, 55),
 (51, 56),
 (51, 57),
 (51, 58),
 (51, 59),
 (51, 60),
 (51, 61),
 (51, 62),
 (51, 63),
 (51, 64),
 (51, 65),
 (51, 66),
 (51, 67),
 (51, 68),
 (51, 69),
 (51, 70),
 (51, 71),
 (51, 72),
 (51, 73),
 (51, 74),
 (51, 75),
 (51, 76),
 (51, 77),
 (51, 78),
 (51, 79),
 (51, 80),
 (51, 81),
 (51, 82),
 (51, 83),
 (51, 84),
 (51, 85),
 (51, 86),
 (51, 87),
 (51, 88),
 (51, 89),
 (51, 90),
 (51, 91),
 (51, 92),
 (51, 93),
 (51, 94),
 (51, 95),
 (51, 96),
 (51, 97),
 (51, 98),
 (51, 99),
 (51, 100),
 (51, 101),
 (51, 102),
 (51, 103),
 (51, 104),
 (51, 105),
 (51, 106),
 (51, 107),
 (51, 108),
 (51, 109),
 (51, 110),
 (51, 111),
 (51, 112),
 (51, 113),
 (51, 114),
 (51, 115),
 (51, 116),
 (51, 117),
 (51, 118),
 (51, 119),
 (51, 120),
 (51, 121),
 (51, 122),
 (51, 123),
 (51, 124),
 (51, 125),
 (51, 126),
 (51, 127),
 (51, 128),
 (51, 129),
 (51, 130),
 (51, 131),
