# Q2. ダンゴムシの性決定遺伝子（候補）は何か

In [29]:
# ファイル名
RES_FILE = 'DE_result2.txt'  # 遺伝子発現量・比較結果
BST_FILE = 'DEGs_BLAST.txt' # 詳細情報リスト（BLAST結果）
GO_FILE  = 'DEGs_GO.txt'    # GO_IDリスト


In [30]:
# パッケージやツールをインポート
import pandas as pd
%matplotlib inline

# 読み込み
RES = pd.read_csv(RES_FILE, sep='\t', header=0,  index_col=0)
BST = pd.read_csv(BST_FILE, sep='\t', header=0,  index_col=0)
BST = BST.drop_duplicates()
GO  = pd.read_csv(GO_FILE, sep='\t', header=-1, index_col=0, names=['GO'])

# GO search関数
def search_go(GO_ID):
    import requests, sys
    import urllib.parse
    import ast
    import json

    requestURL = "https://www.ebi.ac.uk/QuickGO/services/ontology/go/search?query=" + urllib.parse.quote(GO_ID) + "&limit=1&page=1"

    r = requests.get(requestURL, headers={ "Accept" : "application/json"})

    if not r.ok:
        r.raise_for_status()
        sys.exit()
        
    responseBody = r.text
    d = json.loads(responseBody)

    for res in d["results"]:
        print(res['id'])
        print(res['name'])
        print(res['aspect'])

In [170]:
sub = RES[RES['FDR']<0.05]

sub = pd.merge(sub, BST, left_index=True, right_index=True, how='left')
sub = pd.merge(sub, GO, left_index=True, right_index=True, how='left').fillna('.')
sub = sub.drop_duplicates()

## AGHを確認

In [171]:
# 検索ワード
KEYWORD = 'AGH'
sub[ sub['sprot_Top_BLASTX_hit'].str.contains(KEYWORD) ]

Unnamed: 0,female_1,female_2,male_1,male_2,sampleA,sampleB,logFC,logCPM,PValue,FDR,Wolbachia_wVulC,A_vulgare_f_element,A_vulgare_mitochondrion,sex_related_genes,sprot_BLASTX,sprot_Top_BLASTX_hit,GO
TRINITY_DN58978_c3_g9,1,0,609,1060,female,male,-10.585539,6.138261,1.0986279999999999e-38,8.767409e-36,.,.,.,.,AGH_ARMVU; Androgenic gland hormone,"AGH_ARMVU^AGH_ARMVU^Q:904-473,H:1-144^100%ID^E...","GO:0003006,GO:0003674,GO:0005102,GO:0005179,GO..."


## 性決定遺伝子候補を探す

### ① メス特異的な遺伝子、オス特異的な遺伝子を抽出
＊注意＊　カウント数が3より小さい場合、「発現なし」とみなして解析している

In [172]:
# メスのみで発現
female = sub[ (sub['male_1']<3) & (sub['male_2']<3) ]

print('遺伝子数:', len(female))

female

遺伝子数: 1108


Unnamed: 0,female_1,female_2,male_1,male_2,sampleA,sampleB,logFC,logCPM,PValue,FDR,Wolbachia_wVulC,A_vulgare_f_element,A_vulgare_mitochondrion,sex_related_genes,sprot_BLASTX,sprot_Top_BLASTX_hit,GO
TRINITY_DN102587_c0_g1,18,8,0,0,female,male,6.574112,0.332206,2.233308e-04,4.304572e-03,.,.,.,.,PAFA_CHICK; Platelet-activating factor acetylh...,"PAFA_CHICK^PAFA_CHICK^Q:1325-414,H:129-396^32....","GO:0003674,GO:0003824,GO:0003847,GO:0005575,GO..."
TRINITY_DN103128_c0_g1,8,11,0,0,female,male,6.180121,0.040300,2.953034e-03,3.741636e-02,.,.,.,.,RGP1_HUMAN; RAB6A-GEF complex partner protein ...,"RGP1_HUMAN^RGP1_HUMAN^Q:14-469,H:34-183^50%ID^...","GO:0000139,GO:0003674,GO:0005085,GO:0005088,GO..."
TRINITY_DN103827_c0_g1,17,16,0,0,female,male,6.949162,0.618646,9.687615e-06,2.624335e-04,.,.,.,.,SYV_NITOC; Valine--tRNA ligase {ECO:0000255|HA...,"SYV_NITOC^SYV_NITOC^Q:3-398,H:790-920^36.364%I...","GO:0000166,GO:0001882,GO:0001883,GO:0002161,GO..."
TRINITY_DN10439_c0_g1,12,8,0,0,female,male,6.217877,0.071968,2.321531e-03,3.098811e-02,.,.,.,.,NADK_COXBU; NAD kinase {ECO:0000255|HAMAP-Rule...,"NADK_COXBU^NADK_COXBU^Q:717-1,H:10-245^57.322%...","GO:0000166,GO:0001882,GO:0001883,GO:0003674,GO..."
TRINITY_DN104414_c0_g1,24,12,0,0,female,male,7.044366,0.696013,8.208978e-06,2.253560e-04,.,.,.,.,.,.,.
TRINITY_DN10449_c0_g1,17,9,1,0,female,male,4.155755,0.383987,9.296425e-04,1.442620e-02,.,.,.,.,.,.,.
TRINITY_DN105972_c0_g1,110,0,0,0,female,male,8.555446,2.016215,1.445466e-04,2.930666e-03,.,.,.,.,.,.,.
TRINITY_DN107557_c0_g1,36,24,0,0,female,male,7.789822,1.319140,5.071913e-09,2.331452e-07,.,.,.,.,.,.,.
TRINITY_DN108109_c0_g1,11,12,0,0,female,male,6.440483,0.229015,4.167236e-04,7.281445e-03,.,.,.,.,DUSA_VIBCH; tRNA-dihydrouridine(20/20a) syntha...,"DUSA_VIBCH^DUSA_VIBCH^Q:1-363,H:123-244^63.934...","GO:0000049,GO:0000166,GO:0003674,GO:0003676,GO..."
TRINITY_DN10912_c0_g1,12,10,0,0,female,male,6.364371,0.174930,8.074742e-04,1.282056e-02,.,.,.,.,.,.,.


In [173]:
# オスのみで発現
male = sub[ (sub['female_1']<3) & (sub['female_2']<3) ]

print('遺伝子数:', len(male))

male

遺伝子数: 1410


Unnamed: 0,female_1,female_2,male_1,male_2,sampleA,sampleB,logFC,logCPM,PValue,FDR,Wolbachia_wVulC,A_vulgare_f_element,A_vulgare_mitochondrion,sex_related_genes,sprot_BLASTX,sprot_Top_BLASTX_hit,GO
TRINITY_DN10342_c0_g1,1,2,6,23,female,male,-3.354399,0.685689,2.942449e-03,3.735031e-02,.,.,.,.,.,.,.
TRINITY_DN10490_c0_g1,0,0,9,9,female,male,-6.311904,0.073820,1.761585e-03,2.472721e-02,.,.,.,.,.,.,.
TRINITY_DN105138_c0_g1,0,0,5,23,female,male,-6.911949,0.536775,1.727470e-04,3.438527e-03,.,.,.,.,.,.,.
TRINITY_DN10748_c0_g1,0,0,13,12,female,male,-6.782332,0.429271,4.754559e-05,1.094084e-03,.,.,.,.,.,.,.
TRINITY_DN112291_c0_g1,0,0,15,7,female,male,-6.614399,0.296706,3.537212e-04,6.330570e-03,.,.,.,.,.,.,.
TRINITY_DN118029_c0_g1,0,0,9,26,female,male,-7.239649,0.803323,8.066334e-06,2.221491e-04,.,.,.,.,.,.,.
TRINITY_DN118307_c0_g1,0,0,12,5,female,male,-6.249779,0.025389,3.677817e-03,4.453635e-02,.,.,.,.,.,.,.
TRINITY_DN118349_c0_g1,0,0,9,10,female,male,-6.386605,0.128593,1.033869e-03,1.576884e-02,.,.,.,.,.,.,.
TRINITY_DN119663_c0_g1,1,2,15,15,female,male,-3.434465,0.740392,5.130576e-04,8.727153e-03,.,.,.,.,.,.,.
TRINITY_DN12085_c0_g1,1,0,12,7,female,male,-4.182851,0.182932,4.258862e-03,4.993290e-02,.,.,.,.,.,.,.


### ② 動物で知られている性決定関連の遺伝子は含まれているか調べる。
sex-related geneの列に情報がある遺伝子を抽出する

In [81]:
# メス特異的な遺伝子について
female[female['sex_related_genes'] != '.']

Unnamed: 0,female_1,female_2,male_1,male_2,sampleA,sampleB,logFC,logCPM,PValue,FDR,Wolbachia_wVulC,A_vulgare_f_element,A_vulgare_mitochondrion,sex_related_genes,sprot_BLASTX,sprot_Top_BLASTX_hit,GO
TRINITY_DN54243_c6_g3,21,5,1,0,female,male,4.120197,0.364765,0.003078,0.038676,.,.,.,XM_022249400.1 PREDICTED: Acanthaster planci h...,H2B_CHAGB; Histone H2B,"H2B_CHAGB^H2B_CHAGB^Q:299-3,H:34-132^98.99%ID^...",.


In [82]:
# オス特異的な遺伝子について
male[male['sex_related_genes'] != '.']

Unnamed: 0,female_1,female_2,male_1,male_2,sampleA,sampleB,logFC,logCPM,PValue,FDR,Wolbachia_wVulC,A_vulgare_f_element,A_vulgare_mitochondrion,sex_related_genes,sprot_BLASTX,sprot_Top_BLASTX_hit,GO
TRINITY_DN57145_c0_g4,0,0,20,7,female,male,-6.913066,0.530322,0.0001,0.002104,.,.,.,KP076286.1 Oreochromis mossambicus elongation ...,EF1A_CAEEL; Elongation factor 1-alpha,"EF1A_CAEEL^EF1A_CAEEL^Q:679-2,H:194-419^97.788...","GO:0000166,GO:0001882,GO:0001883,GO:0003674,GO..."


### ③ 性決定関連のGO_IDをもつ遺伝子は？

In [161]:
"""
性染色体
GO:0000803  sex chromosome

性決定
GO:0007530  sex determination
GO:0030237  female sex determination
GO:0030238  male   sex determination
GO:0018993  somatic sex determination
GO:0007538  primary sex determination
GO:0007545  processes downstream of sex determination signal

性分化
GO:0007548  sex differentiation
GO:0046660  female sex differentiation
GO:0046661  male   sex differentiation
GO:0048071  sex-specific pigmentation
"""

# 検索ワード
GO_ID = 'GO:0048071'
search_go(GO_ID)

# メス特異的
female[female['GO'].str.contains(GO_ID)]

GO:0048071
sex-specific pigmentation
biological_process


Unnamed: 0,female_1,female_2,male_1,male_2,sampleA,sampleB,logFC,logCPM,PValue,FDR,Wolbachia_wVulC,A_vulgare_f_element,A_vulgare_mitochondrion,sex_related_genes,sprot_BLASTX,sprot_Top_BLASTX_hit,GO


In [162]:
# オス特異的
male[male['GO'].str.contains(GO_ID)]

Unnamed: 0,female_1,female_2,male_1,male_2,sampleA,sampleB,logFC,logCPM,PValue,FDR,Wolbachia_wVulC,A_vulgare_f_element,A_vulgare_mitochondrion,sex_related_genes,sprot_BLASTX,sprot_Top_BLASTX_hit,GO


### ④ sprot_BLASTXのキーワード検索

In [149]:
# 検索ワード
KEYWORD = 'Ankyrin'

# メス特異的
female[female['sprot_BLASTX'].str.contains(KEYWORD)]

Unnamed: 0,female_1,female_2,male_1,male_2,sampleA,sampleB,logFC,logCPM,PValue,FDR,Wolbachia_wVulC,A_vulgare_f_element,A_vulgare_mitochondrion,sex_related_genes,sprot_BLASTX,sprot_Top_BLASTX_hit,GO
TRINITY_DN58432_c6_g2,25,9,0,0,female,male,6.947922,0.62098,3.054307e-05,0.0007397821,.,.,.,.,"SECG_DICDI; Ankyrin repeat, PH and SEC7 domain...","SECG_DICDI^SECG_DICDI^Q:145-648,H:331-498^33.7...","GO:0000323,GO:0003006,GO:0003674,GO:0005085,GO..."
TRINITY_DN59511_c2_g1,18,13,1,1,female,male,3.568575,0.61805,0.0004154417,0.007269483,.,.,.,.,ANK3_HUMAN; Ankyrin-3 {ECO:0000303|PubMed:7836...,"ANK3_HUMAN^ANK3_HUMAN^Q:3-623,H:507-713^68.116...","GO:0000281,GO:0000323,GO:0000910,GO:0001508,GO..."
TRINITY_DN60259_c0_g1,40,59,0,0,female,male,8.550802,2.000132,3.312079e-12,2.226962e-10,.,.,.,.,ANK3_MOUSE; Ankyrin-3,"ANK3_MOUSE^ANK3_MOUSE^Q:538-146,H:37-154^31.29...","GO:0000281,GO:0000323,GO:0000910,GO:0001508,GO..."


In [150]:
# オス特異的
male[male['sprot_BLASTX'].str.contains(KEYWORD)]

Unnamed: 0,female_1,female_2,male_1,male_2,sampleA,sampleB,logFC,logCPM,PValue,FDR,Wolbachia_wVulC,A_vulgare_f_element,A_vulgare_mitochondrion,sex_related_genes,sprot_BLASTX,sprot_Top_BLASTX_hit,GO
