In [43]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
from sklearn.preprocessing import StandardScaler

In [33]:
# 预处理数据 
def process_raw_data(data):
    data['Protein IDs'] = data['Protein IDs'].str.split(';').str[0]
    data = data[~data['Protein IDs'].str.contains('REV|CON')]
    return data

In [34]:
index_map = pd.read_csv('map_result/index_map.csv',index_col = 0)
index_map.drop_duplicates(subset = 'Protein IDs' , inplace = True)
index_map

Unnamed: 0,Protein IDs,Processed Protein IDs
0,1433B_HUMAN,P31946|1433B_HUMAN
1,1433B_MOUSE,Q9CQV8|1433B_MOUSE
2,1433E_HUMAN,P62258|1433E_HUMAN
3,1433E_MOUSE,P62259|1433E_MOUSE
4,1433F_HUMAN,Q04917|1433F_HUMAN
...,...,...
116415,sp|Q9Y6Y1-3|CMTA1_HUMANIsoform3ofCalmodulin-bi...,Q9Y6Y1-3|CMTA1_HUMAN
116417,sp|Q9Y6Y8-2|S23IP_HUMANIsoform2ofSEC23-interac...,Q9Y6Y8-2|S23IP_HUMAN
116420,sp|Q9Y6Z4|KIAS1_HUMANPutativeuncharacterizedpr...,Q9Y6Z4|KIAS1_HUMAN
116421,sp|Q9Y6Z5-2|MLAS1_HUMANIsoform2ofPutativeuncha...,Q9Y6Z5-2|MLAS1_HUMAN


### SCP99043

In [35]:
# 对照组和脂多糖处理后的单个巨噬细胞 CON LPS24 LPS48
exp_99043 = pd.read_csv('MS-based-SCP/Peripheral blood/SCP99043/all_intensity_included_proteinGroups_RAW_LPS.txt.csv',index_col = 0)
exp_99043 = process_raw_data(exp_99043)
exp_99043_Intensity = exp_99043[[col for col in exp_99043.columns if 'LFQ' not in col]]
exp_99043_Intensity
# exp_99043_LFQ = exp_99043[['Protein IDs'] + [col for col in exp_99043.columns if 'LFQ' in col]]
# exp_99043_LFQ

Unnamed: 0,Protein IDs,Intensity,Intensity CON_001,Intensity CON_002,Intensity CON_003,Intensity CON_004,Intensity CON_005,Intensity CON_006,Intensity CON_007,Intensity CON_008,...,Intensity LPS48_041,Intensity LPS48_042,Intensity LPS48_043,Intensity LPS48_044,Intensity LPS48_045,Intensity LPS48_046,Intensity LPS48_047,Intensity LPS48_048,Intensity LPS48_049,Intensity LPS48_Lib
0,A0JP43,121290,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
1,A2A4P0,20693,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
2,A2A884,797940,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,28452
3,A2ABU4,3674100,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
4,A2AIV2,687470,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,379950
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1952,Q9Z2U0,1510400,0.0,0.0,11847.0,0.0,0.0,0.0,0.0,0.0,...,2007.6,0.0,0.0,0.0,17137.0,0.0,4869.5,0.0,0.0,420660
1953,Q9Z2U1,1815900,0.0,8497.0,14601.0,3199.9,0.0,0.0,4020.5,0.0,...,23856.0,4642.2,0.0,0.0,22085.0,0.0,6124.7,3092.9,2694.4,557870
1954,Q9Z2X1,19150000,12395.0,40745.0,0.0,9411.8,5312.5,12871.0,0.0,10442.0,...,128480.0,60798.0,108460.0,0.0,57911.0,215930.0,87253.0,61347.0,103800.0,7156200
1955,Q9Z2X2,82505,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,76737


In [36]:
exp_99043_Intensity_map = exp_99043_Intensity.merge(
    index_map[['Protein IDs', 'Processed Protein IDs']],  # 只选择 index_map 中的 Protein IDs 和 Processed Protein IDs 列
    on='Protein IDs',  # 合并的依据是 Protein IDs 列
    how='left'  # 使用 left join 保留 exp_99043_Intensity 中所有的行
)
exp_99043_Intensity_map.drop('Protein IDs', axis = 1, inplace = True)
exp_99043_Intensity_map.set_index('Processed Protein IDs', inplace = True)
exp_99043_Intensity_map

Unnamed: 0_level_0,Intensity,Intensity CON_001,Intensity CON_002,Intensity CON_003,Intensity CON_004,Intensity CON_005,Intensity CON_006,Intensity CON_007,Intensity CON_008,Intensity CON_009,...,Intensity LPS48_041,Intensity LPS48_042,Intensity LPS48_043,Intensity LPS48_044,Intensity LPS48_045,Intensity LPS48_046,Intensity LPS48_047,Intensity LPS48_048,Intensity LPS48_049,Intensity LPS48_Lib
Processed Protein IDs,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
A0JP43|EFCB5_MOUSE,121290,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
A2A4P0|DHX8_MOUSE,20693,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
A2A884|ZEP3_MOUSE,797940,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,28452
A2ABU4|MYOM3_MOUSE,3674100,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
A2AIV2|VIR_MOUSE,687470,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,379950
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Q9Z2U0|PSA7_MOUSE,1510400,0.0,0.0,11847.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2007.6,0.0,0.0,0.0,17137.0,0.0,4869.5,0.0,0.0,420660
Q9Z2U1|PSA5_MOUSE,1815900,0.0,8497.0,14601.0,3199.9,0.0,0.0,4020.5,0.0,3621.8,...,23856.0,4642.2,0.0,0.0,22085.0,0.0,6124.7,3092.9,2694.4,557870
Q9Z2X1|HNRPF_MOUSE,19150000,12395.0,40745.0,0.0,9411.8,5312.5,12871.0,0.0,10442.0,25769.0,...,128480.0,60798.0,108460.0,0.0,57911.0,215930.0,87253.0,61347.0,103800.0,7156200
Q9Z2X2|PSD10_MOUSE,82505,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,76737


### SCP77481

In [37]:
exp_77481_1 = pd.read_csv('MS-based-SCP/Peripheral blood/SCP77481/all_intensity_included_proteinGroups_boost.txt.csv',index_col = 0)
exp_77481_2 = pd.read_csv('MS-based-SCP/Peripheral blood/SCP77481/all_intensity_included_proteinGroups_single_cell.txt.csv',index_col = 0)
exp_77481_1 = process_raw_data(exp_77481_1)
exp_77481_2 = process_raw_data(exp_77481_2)
exp_77481_2 = exp_77481_2[[col for col in exp_77481_2.columns if ('corrected' not in col and 'count' not in col and 'Reporter' not in col)]]
exp_77481_1 = exp_77481_1[[col for col in exp_77481_1.columns if ('corrected' not in col and 'count' not in col and 'Reporter' not in col)]]
exp_77481_1

Unnamed: 0,Protein IDs,Intensity,Intensity 0ng_run_1,Intensity 0ng_run_2,Intensity 50ng_run_1,Intensity 50ng_run_2,Intensity 5ng_run_1,Intensity 5ng_run_2
0,1433B_MOUSE,160980000.0,0,0,94550000.0,34037000.0,15672000.0,16717000.0
1,1433E_MOUSE,885630000.0,1121000,845920,523990000.0,259300000.0,48642000.0,51731000.0
2,1433F_MOUSE,181440000.0,0,0,100680000.0,70059000.0,5074200.0,5630200.0
3,1433G_MOUSE,208220000.0,0,593300,157070000.0,18264000.0,19430000.0,12861000.0
4,1433S_MOUSE,117730000.0,0,0,76338000.0,23848000.0,10600000.0,6942200.0
...,...,...,...,...,...,...,...,...
1592,ZCH18_MOUSE,1945800.0,0,0,1945800.0,0.0,0.0,0.0
1593,ZN326_MOUSE,3821400.0,0,0,3821400.0,0.0,0.0,0.0
1594,ZO1_MOUSE,5663000.0,0,0,3015900.0,2647200.0,0.0,0.0
1595,ZW10_MOUSE,20178000.0,0,0,20178000.0,0.0,0.0,0.0


In [38]:
exp_77481_1_map = exp_77481_1.merge(index_map[['Protein IDs','Processed Protein IDs']], on = 'Protein IDs', how = 'left')
exp_77481_1_map.drop('Protein IDs', axis = 1, inplace = True)
exp_77481_1_map.set_index('Processed Protein IDs',inplace = True)
exp_77481_1_map

exp_77481_2_map = exp_77481_2.merge(index_map[['Protein IDs','Processed Protein IDs']], on = 'Protein IDs', how = 'left')
exp_77481_2_map.drop('Protein IDs',axis = 1, inplace = True)
exp_77481_2_map.set_index('Processed Protein IDs',inplace =True)
exp_77481_1_map

Unnamed: 0_level_0,Intensity,Intensity 0ng_run_1,Intensity 0ng_run_2,Intensity 50ng_run_1,Intensity 50ng_run_2,Intensity 5ng_run_1,Intensity 5ng_run_2
Processed Protein IDs,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Q9CQV8|1433B_MOUSE,160980000.0,0,0,94550000.0,34037000.0,15672000.0,16717000.0
P62259|1433E_MOUSE,885630000.0,1121000,845920,523990000.0,259300000.0,48642000.0,51731000.0
P68510|1433F_MOUSE,181440000.0,0,0,100680000.0,70059000.0,5074200.0,5630200.0
P61982|1433G_MOUSE,208220000.0,0,593300,157070000.0,18264000.0,19430000.0,12861000.0
O70456|1433S_MOUSE,117730000.0,0,0,76338000.0,23848000.0,10600000.0,6942200.0
...,...,...,...,...,...,...,...
Q0P678|ZCH18_MOUSE,1945800.0,0,0,1945800.0,0.0,0.0,0.0
O88291|ZN326_MOUSE,3821400.0,0,0,3821400.0,0.0,0.0,0.0
P39447|ZO1_MOUSE,5663000.0,0,0,3015900.0,2647200.0,0.0,0.0
O54692|ZW10_MOUSE,20178000.0,0,0,20178000.0,0.0,0.0,0.0


In [16]:
# exp_77481_2_map   # 和SCP31766一样的数据

Unnamed: 0_level_0,Intensity,Intensity 1_A,Intensity 1_B,Intensity 1_C,Intensity 2_A,Intensity 2_B,Intensity 2_C,Intensity 3_A,Intensity 3_B,Intensity 3_C,Intensity 4_A,Intensity 4_B,Intensity 4_C
Processed Protein IDs,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
Q9CQV8|1433B_MOUSE,1.093000e+08,14397000.0,8391500.0,9274000.0,8518700.0,6540300.0,12894000.0,8626200.0,6430300.0,6724800.0,8524500.0,4192200.0,14786000.0
P62259|1433E_MOUSE,1.346400e+09,78246000.0,140610000.0,105740000.0,91697000.0,96269000.0,117590000.0,82407000.0,123030000.0,125700000.0,112110000.0,94343000.0,178650000.0
P68510|1433F_MOUSE,1.070900e+08,7183300.0,9017700.0,7865000.0,6333500.0,10112000.0,11121000.0,12829000.0,7562400.0,7994300.0,9298600.0,7840100.0,9935500.0
P61982|1433G_MOUSE,1.856400e+08,16097000.0,23328000.0,13831000.0,12897000.0,14084000.0,14379000.0,15996000.0,13682000.0,12532000.0,17480000.0,11616000.0,19713000.0
O70456|1433S_MOUSE,1.117700e+08,10168000.0,11000000.0,7133400.0,7166900.0,10001000.0,13233000.0,9670800.0,8528800.0,8044400.0,7874100.0,7766300.0,11180000.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
Q3TIV5|ZC3HF_MOUSE,1.360100e+06,0.0,788070.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,572060.0,0.0
Q5SS00|ZDBF2_MOUSE,1.197300e+08,0.0,0.0,12525000.0,0.0,0.0,30162000.0,0.0,0.0,15506000.0,27163000.0,0.0,34376000.0
O88291|ZN326_MOUSE,8.390300e+06,481520.0,1248500.0,431170.0,852430.0,649330.0,973510.0,396540.0,372810.0,529660.0,381350.0,906380.0,1167000.0
P39447|ZO1_MOUSE,4.719800e+07,13431000.0,822130.0,0.0,0.0,0.0,461790.0,13004000.0,0.0,621090.0,16795000.0,909200.0,1153300.0


### SCP80019

In [39]:
# Hela 细胞系 是human
exp_80019 = pd.read_csv('MS-based-SCP/Peripheral blood/SCP80019/all_intensity_included_proteinGroups_all.txt.csv',index_col = 0)
exp_80019 = process_raw_data(exp_80019)
exp_80019 = exp_80019[[col for col in exp_80019.columns if 'LFQ' not in col]]
exp_80019.to_csv('MS-based-SCP/Peripheral blood/SCP80019/index_map.csv')
exp_80019_map = exp_80019.merge(index_map[['Protein IDs','Processed Protein IDs']],on = 'Protein IDs',how = 'left')
exp_80019_map.drop('Protein IDs',axis = 1,inplace = True)
exp_80019_map.set_index('Processed Protein IDs',inplace = True)
exp_80019_map

Unnamed: 0_level_0,Intensity,Intensity 20 HeLa cells,Intensity Blank,Intensity Single cell 1,Intensity Single cell 2,Intensity Single cell 4,Intensity 100 HeLa cells,Intensity Single cell 3
Processed Protein IDs,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
A0AVT1|UBA6_HUMAN,2628200,0,0.0,0.0,0.0,0.0,2628200,0
A6NDG6|PGP_HUMAN,2358500,411310,0.0,0.0,0.0,97001.0,1824800,25408
P30046|DOPD_HUMAN,9721700,1082100,0.0,0.0,0.0,82810.0,8556700,0
A6NHQ2|FBLL1_HUMAN,2340700,0,0.0,0.0,0.0,0.0,2340700,0
A6NKF1|SAC31_HUMAN,19183000,0,0.0,0.0,0.0,0.0,19183000,0
...,...,...,...,...,...,...,...,...
Q9Y6C9|MTCH2_HUMAN,6248200,816390,0.0,26491.0,27397.0,279900.0,5098000,0
Q9Y6E2|5MP1_HUMAN,15330000,1385800,0.0,44789.0,70630.0,235730.0,13593000,0
Q9Y6G9|DC1L1_HUMAN,852270,339320,0.0,0.0,0.0,0.0,512950,0
Q9Y6H1|CHCH2_HUMAN,7306800,790020,0.0,77999.0,21627.0,144730.0,6272400,0


### SCP31766

In [40]:
exp_31766 = pd.read_csv('MS-based-SCP/Peripheral blood/SCP31766/filtered_proteinGroups_single_cell.txt.csv',index_col = 0)
exp_31766 = process_raw_data(exp_31766)
exp_31766_map = exp_31766.merge(index_map,on = 'Protein IDs',how = 'left')
exp_31766_map.drop('Protein IDs',axis = 1,inplace = True)
exp_31766_map.set_index('Processed Protein IDs',inplace = True)
exp_31766_map

Unnamed: 0_level_0,Intensity,Intensity 1_A,Intensity 1_B,Intensity 1_C,Intensity 2_A,Intensity 2_B,Intensity 2_C,Intensity 3_A,Intensity 3_B,Intensity 3_C,Intensity 4_A,Intensity 4_B,Intensity 4_C
Processed Protein IDs,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
Q9CQV8|1433B_MOUSE,1.093000e+08,14397000.0,8391500.0,9274000.0,8518700.0,6540300.0,12894000.0,8626200.0,6430300.0,6724800.0,8524500.0,4192200.0,14786000.0
P62259|1433E_MOUSE,1.346400e+09,78246000.0,140610000.0,105740000.0,91697000.0,96269000.0,117590000.0,82407000.0,123030000.0,125700000.0,112110000.0,94343000.0,178650000.0
P68510|1433F_MOUSE,1.070900e+08,7183300.0,9017700.0,7865000.0,6333500.0,10112000.0,11121000.0,12829000.0,7562400.0,7994300.0,9298600.0,7840100.0,9935500.0
P61982|1433G_MOUSE,1.856400e+08,16097000.0,23328000.0,13831000.0,12897000.0,14084000.0,14379000.0,15996000.0,13682000.0,12532000.0,17480000.0,11616000.0,19713000.0
O70456|1433S_MOUSE,1.117700e+08,10168000.0,11000000.0,7133400.0,7166900.0,10001000.0,13233000.0,9670800.0,8528800.0,8044400.0,7874100.0,7766300.0,11180000.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
Q3TIV5|ZC3HF_MOUSE,1.360100e+06,0.0,788070.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,572060.0,0.0
Q5SS00|ZDBF2_MOUSE,1.197300e+08,0.0,0.0,12525000.0,0.0,0.0,30162000.0,0.0,0.0,15506000.0,27163000.0,0.0,34376000.0
O88291|ZN326_MOUSE,8.390300e+06,481520.0,1248500.0,431170.0,852430.0,649330.0,973510.0,396540.0,372810.0,529660.0,381350.0,906380.0,1167000.0
P39447|ZO1_MOUSE,4.719800e+07,13431000.0,822130.0,0.0,0.0,0.0,461790.0,13004000.0,0.0,621090.0,16795000.0,909200.0,1153300.0


### merge the the data

In [41]:
# 重命名各个列名
exp_99043_Intensity_map.columns = exp_99043_Intensity_map.columns + '_Peripheral_blood' + '_Mouse' + '_RAW_2467' + '_SCP99043'
exp_77481_1_map.columns = exp_77481_1_map.columns + '_Peripheral_blood' + '_Mouse' + '_RAW_2467' + '_SCP77481'
exp_77481_2_map.columns = exp_77481_2_map.columns + '_Peripheral_blood' + '_Mouse' + '_RAW_2467' + '_SCP77481'

In [42]:
result = pd.concat([exp_99043_Intensity_map, exp_77481_1_map, exp_77481_2_map], axis=0)
result

Unnamed: 0_level_0,Intensity_Peripheral_blood_Mouse_RAW_2467_SCP99043,Intensity CON_001_Peripheral_blood_Mouse_RAW_2467_SCP99043,Intensity CON_002_Peripheral_blood_Mouse_RAW_2467_SCP99043,Intensity CON_003_Peripheral_blood_Mouse_RAW_2467_SCP99043,Intensity CON_004_Peripheral_blood_Mouse_RAW_2467_SCP99043,Intensity CON_005_Peripheral_blood_Mouse_RAW_2467_SCP99043,Intensity CON_006_Peripheral_blood_Mouse_RAW_2467_SCP99043,Intensity CON_007_Peripheral_blood_Mouse_RAW_2467_SCP99043,Intensity CON_008_Peripheral_blood_Mouse_RAW_2467_SCP99043,Intensity CON_009_Peripheral_blood_Mouse_RAW_2467_SCP99043,...,Intensity 1_C_Peripheral_blood_Mouse_RAW_2467_SCP77481,Intensity 2_A_Peripheral_blood_Mouse_RAW_2467_SCP77481,Intensity 2_B_Peripheral_blood_Mouse_RAW_2467_SCP77481,Intensity 2_C_Peripheral_blood_Mouse_RAW_2467_SCP77481,Intensity 3_A_Peripheral_blood_Mouse_RAW_2467_SCP77481,Intensity 3_B_Peripheral_blood_Mouse_RAW_2467_SCP77481,Intensity 3_C_Peripheral_blood_Mouse_RAW_2467_SCP77481,Intensity 4_A_Peripheral_blood_Mouse_RAW_2467_SCP77481,Intensity 4_B_Peripheral_blood_Mouse_RAW_2467_SCP77481,Intensity 4_C_Peripheral_blood_Mouse_RAW_2467_SCP77481
Processed Protein IDs,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
A0JP43|EFCB5_MOUSE,121290.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
A2A4P0|DHX8_MOUSE,20693.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
A2A884|ZEP3_MOUSE,797940.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
A2ABU4|MYOM3_MOUSE,3674100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
A2AIV2|VIR_MOUSE,687470.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Q3TIV5|ZC3HF_MOUSE,,,,,,,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,572060.0,0.0
Q5SS00|ZDBF2_MOUSE,,,,,,,,,,,...,12525000.0,0.0,0.0,30162000.0,0.0,0.0,15506000.0,27163000.0,0.0,34376000.0
O88291|ZN326_MOUSE,,,,,,,,,,,...,431170.0,852430.0,649330.0,973510.0,396540.0,372810.0,529660.0,381350.0,906380.0,1167000.0
P39447|ZO1_MOUSE,,,,,,,,,,,...,0.0,0.0,0.0,461790.0,13004000.0,0.0,621090.0,16795000.0,909200.0,1153300.0


In [None]:
index = group_data.T.index
Tissue=[]
cell_name = index
cell_name = list(cell_name)
for cell in cell_name:
    tissue = cell.split('_')[-1]
    Tissue.append(tissue)

group_data_T = group_data.T
scaler = StandardScaler()
group_data_T = scaler.fit_transform(group_data_T)

# 进行 t-SNE 降维
tsne = TSNE(n_components=2, random_state=42, perplexity=30)  # perplexity 可以调整
X_tsne = tsne.fit_transform(group_data_T)

# tsne_df = pd.DataFrame(X_tsne, columns=["tSNE1", "tSNE2"])
# tsne_df["Tissue"] = Tissue


# plt.figure(figsize=(10, 8))
# sns.scatterplot(x="tSNE1", y="tSNE2", hue="Tissue", palette="tab20", data=tsne_df, s=50)
# plt.title("t-SNE Visualization of Cell Lines", fontsize=16)
# plt.xlabel("t-SNE 1", fontsize=12)
# plt.ylabel("t-SNE 2", fontsize=12)
# plt.legend(title="Tissue", bbox_to_anchor=(1.05, 1), loc="upper left")
# plt.tight_layout()
# plt.show()