# 各イントロンの両端3塩基の配列パターンの分布を可視化

In [10]:
# オリジナルモジュールのインポート
from lib.introngap import PileUp
from lib.gbkparse import Seq_count

# モジュールのインポート
import itertools
import logomaker
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objs as go

from plotly.subplots import make_subplots
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [11]:
# クラスのインスタンス化
gbk = Seq_count()

# gbkファイルの読み込み
gbk.read_gbk('../data/gbk/human_ttn.gb')

デフォルト値として、最もエクソンの多いNM_001267550.2を設定


In [16]:
# 各イントロンにおける5'側３塩基パターンの分布
fig = make_subplots(rows=4, cols=1, shared_xaxes=True, vertical_spacing=0.02)
for n, motif in enumerate(["GT"+"".join(i) for i in list("ATCG")]):
    introns = []
    for i in range(gbk.intron_num()):
        if gbk.intron_seq(i)[:len(motif)] == motif:
            introns.append(1)
        else:
            introns.append(0)
    x = [i+1 for i in range(len(introns))]
    y = introns
    df = pd.DataFrame({"introns": x, "existence":y, "intron":x})
    # ホバーテンプレートの設定
    hovertemplate = 'Intron: %{x}<extra></extra>'
    fig.add_trace(go.Bar(x=df['introns'], y=df['existence'], name=motif, hovertemplate=hovertemplate), row=n+1, col=1)
    # fig = px.bar(df, x="introns", y="existence", hover_data={'introns': False, 'existence': False, 'intron': True}, title=f"5' intron edge: {motif}" )
fig.update_layout(width=800, height=400, title_text=f"5' intron edge (NM_001267550.2)" )
fig.show()

In [17]:
# 各イントロンにおける3'側３塩基パターンの分布
fig = make_subplots(rows=4, cols=1, shared_xaxes=True, vertical_spacing=0.02)
for n, motif in enumerate(["".join(i)+"AG" for i in list("ATCG")]):
    introns = []
    for i in range(gbk.intron_num()):
        if gbk.intron_seq(i)[-len(motif):] == motif:
            introns.append(1)
        else:
            introns.append(0)
    x = [i+1 for i in range(len(introns))]
    y = introns
    df = pd.DataFrame({"introns": x, "existence":y, "intron":x})
    # ホバーテンプレートの設定
    hovertemplate = 'Intron: %{x}<extra></extra>'
    fig.add_trace(go.Bar(x=df['introns'], y=df['existence'], name=motif, hovertemplate=hovertemplate), row=n+1, col=1)
fig.update_layout(width=800, height=400, title_text=f"3' intron edge (NM_001267550.2)" )
fig.show()

In [14]:
# 各イントロンにおける5'側３塩基パターンの分布
motif5 = "GTA"
introns5 = []
for i in range(gbk.intron_num()):
    if gbk.intron_seq(i)[:len(motif5)] == motif5:
        introns5.append(1)
    else:
        introns5.append(0)
df5 = pd.DataFrame({"intron":[i+1 for i in range(len(introns5))], "existence":introns5, "description": [f"intron: {i+1}" for i in range(len(introns5))]})

# 各イントロンにおける3'側３塩基パターンの分布
motif3 = "AAG"
introns3 = []
for i in range(gbk.intron_num()):
    if gbk.intron_seq(i)[-len(motif3):] == motif3:
        introns3.append(1)
    else:
        introns3.append(0)
df3 = pd.DataFrame({"intron":[i+1 for i in range(len(introns3))], "existence":introns3, "description": [f"intron: {i+1}" for i in range(len(introns3))]})

fig = make_subplots(rows=2, cols=1, shared_xaxes=True, vertical_spacing=0.02)
hovertemplate5 = 'Intron: %{x}<extra></extra>'
hovertemplate3 = 'Intron: %{x}<extra></extra>'
fig.add_trace(go.Bar(x=df5['intron'], y=df5['existence'], name=f"5' edge: {motif5}", hovertemplate=hovertemplate5), row=1, col=1)
fig.add_trace(go.Bar(x=df3['intron'], y=df3['existence'], name=f"3' edge: {motif3}", hovertemplate=hovertemplate3), row=2, col=1)
fig.update_layout(width=800, height=300, title_text=f"motif in Intron edges" )
fig.show()

![PSI](../data/pic/Figure5A.png)

In [60]:
# エクソン配列が3の倍数であるエクソンの表示
exon_3x = []
for i in range(gbk.exon_num()):
    if len(gbk.exon_seq(i+1)) % 3 == 0:
        exon_3x.append(1)
    else:
        exon_3x.append(0)
df = pd.DataFrame({"exon":[i+1 for i in range(gbk.exon_num())], 
                   "existence":exon_3x, 
                   "length":[len(gbk.exon_seq(i+1)) for i in range(gbk.exon_num())], 
                   "bar": [1 for i in range(gbk.exon_num())]})

fig = px.bar(df, x="exon", y="bar", color='existence', hover_data={'exon': True, 'existence': False, 'length': True, 'bar':False}, 
             title=f"Exons with a base number that is a multiple of 3 (NM_001267550.2)", color_continuous_scale='YlGn')
fig.update_layout(width=800, height=250)
fig.update_coloraxes(showscale=False)
fig.show()

In [41]:
import plotly.express as px
import pandas as pd

# サンプルデータの作成
data = {
    'Category': ['A', 'B', 'C', 'D'],
    'Values': [10, 20, 30, 40]
}

# データフレームの作成
df = pd.DataFrame(data)

# カテゴリごとに異なる色を割り当てる
fig = px.bar(df, x='Category', y='Values', color='Category')

# グラフの表示
fig.show()
