# LIQA Transcript Quantification Results Analysis Part 2

Continuing from Part 1, this notebook focuses on data cleaning and the creation of two dataframes: novel transcript quantification dataframe, and annotated transcript quantification dataframe.

## Part 1: Import Data and Configure Python Libraries

In [1]:
import os
import glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.gridspec
%matplotlib inline
import seaborn as sns
import re
from IPython.display import display
from matplotlib.pyplot import gcf
from sklearn.decomposition import PCA 
from sklearn.preprocessing import StandardScaler
from PIL import ImageColor
from matplotlib.patches import Patch #for custom legend making
import scipy.spatial as sp, scipy.cluster.hierarchy as hc #for faster computing of hierarchial clusters

In [2]:
#pd.options.display.max_columns = None #display all columns in dataframe
#pd.options.display.max_rows = None

In [3]:
#pd.options.display.max_colwidth = 100 #show the full content of long strings

### Import Data

In [2]:
os.getcwd()

'C:\\Users\\15082\\OneDrive\\Desktop\\thesis_research\\gtex_v9_data_analysis\\LIQA\\work_in_progress'

In [4]:
# change working directory
os.chdir('C:\\Users\\15082\\OneDrive\\Desktop\\thesis_research\\gtex_v9_data\\data_for_analysis\\my_liqa_data')

#### Read data into pandas dataframe

In [5]:
liqa_merged_df = pd.read_csv('liqa_merged.csv',sep=',')

## Part 2: Data Manipulation and Data Cleaning

In [6]:
liqa_merged_df.head(5)

Unnamed: 0.1,Unnamed: 0,composite_id,GTEX-1192X-0011-R10a-SM-4RXXZ,GTEX-11H98-0011-R11b-SM-4SFLZ,GTEX-11TTK-0011-R7b-SM-4TVFS,GTEX-1211K-0826-SM-7LDFQ,GTEX-1313W-0011-R7b-SM-4ZL3U,GTEX-13QBU-0426-SM-5A4VT,GTEX-13QJ3-0726-SM-7LDHS,GTEX-13QJ3-0726-SM-7LDHS_rep,...,GTEX-ZPU1-0826-SM-4UJSC,GTEX-ZT9X-0326-SM-4U9QG,GTEX-ZT9X-1826-SM-4V2KV,GTEX-ZT9X-1826-SM-4V2KV_rep,GTEX-ZT9X-1826-SM-4V2KV_rep2,GTEX-ZVZP-0226-SM-4VEIO,K562_ampure,K562_ampure_70ng,K562_extrawash,K562_extrawashwarm
0,0,ENST00000479581.5_ENSG00000204540.10,3.675106e-16,1.125,,,,,,,...,,6.399547,,,,,,,,
1,1,130750ff-7bd5-42fc-9951-2ee00e4c4253_ENSG00000...,1.072175e-16,0.916406,,,,,,,...,,0.000142,,,,,,,,
2,2,0d6ef114-5d3f-4df1-a18b-02c128c21368_ENSG00000...,0.001093079,0.003797,,,,,,,...,,0.852157,,,,,,,,
3,3,e692f135-46a2-4cf8-953c-c0c636810214_ENSG00000...,1.560275e-16,1.333594,,,,,,,...,,0.000206,,,,,,,,
4,4,73af7ae0-6dc4-4d06-8132-8e644cb837f4_ENSG00000...,3.998907,5.621203,,,,,,,...,,0.747948,,,,,,,,


In [8]:
liqa_merged_df['transcript_type'] = liqa_merged_df['composite_id'].apply(lambda x: x[0])
liqa_merged_df['novel_or_annot'] = liqa_merged_df['transcript_type'].apply(lambda x: 'annot' if x=='E' else 'novel')

In [9]:
liqa_merged_df.head(3)

Unnamed: 0.1,Unnamed: 0,composite_id,GTEX-1192X-0011-R10a-SM-4RXXZ,GTEX-11H98-0011-R11b-SM-4SFLZ,GTEX-11TTK-0011-R7b-SM-4TVFS,GTEX-1211K-0826-SM-7LDFQ,GTEX-1313W-0011-R7b-SM-4ZL3U,GTEX-13QBU-0426-SM-5A4VT,GTEX-13QJ3-0726-SM-7LDHS,GTEX-13QJ3-0726-SM-7LDHS_rep,...,GTEX-ZT9X-1826-SM-4V2KV,GTEX-ZT9X-1826-SM-4V2KV_rep,GTEX-ZT9X-1826-SM-4V2KV_rep2,GTEX-ZVZP-0226-SM-4VEIO,K562_ampure,K562_ampure_70ng,K562_extrawash,K562_extrawashwarm,transcript_type,novel_or_annot
0,0,ENST00000479581.5_ENSG00000204540.10,3.675106e-16,1.125,,,,,,,...,,,,,,,,,E,annot
1,1,130750ff-7bd5-42fc-9951-2ee00e4c4253_ENSG00000...,1.072175e-16,0.916406,,,,,,,...,,,,,,,,,1,novel
2,2,0d6ef114-5d3f-4df1-a18b-02c128c21368_ENSG00000...,0.001093079,0.003797,,,,,,,...,,,,,,,,,0,novel


In [10]:
liqa_merged_df['composite_id'].groupby(liqa_merged_df['novel_or_annot']).count()

novel_or_annot
annot    18263
novel    62108
Name: composite_id, dtype: int64

In [11]:
# drop the first column
liqa_merged_df.drop(columns=liqa_merged_df.columns[0], axis=1, inplace=True)

In [12]:
liqa_merged_df.head(3)

Unnamed: 0,composite_id,GTEX-1192X-0011-R10a-SM-4RXXZ,GTEX-11H98-0011-R11b-SM-4SFLZ,GTEX-11TTK-0011-R7b-SM-4TVFS,GTEX-1211K-0826-SM-7LDFQ,GTEX-1313W-0011-R7b-SM-4ZL3U,GTEX-13QBU-0426-SM-5A4VT,GTEX-13QJ3-0726-SM-7LDHS,GTEX-13QJ3-0726-SM-7LDHS_rep,GTEX-13RTJ-0011-R7b-SM-5CTCB,...,GTEX-ZT9X-1826-SM-4V2KV,GTEX-ZT9X-1826-SM-4V2KV_rep,GTEX-ZT9X-1826-SM-4V2KV_rep2,GTEX-ZVZP-0226-SM-4VEIO,K562_ampure,K562_ampure_70ng,K562_extrawash,K562_extrawashwarm,transcript_type,novel_or_annot
0,ENST00000479581.5_ENSG00000204540.10,3.675106e-16,1.125,,,,,,,,...,,,,,,,,,E,annot
1,130750ff-7bd5-42fc-9951-2ee00e4c4253_ENSG00000...,1.072175e-16,0.916406,,,,,,,,...,,,,,,,,,1,novel
2,0d6ef114-5d3f-4df1-a18b-02c128c21368_ENSG00000...,0.001093079,0.003797,,,,,,,,...,,,,,,,,,0,novel


### Dataframe of novel transcript quantification results

In [13]:
liqa_novel_transcript_quant_data = liqa_merged_df[liqa_merged_df.novel_or_annot=='novel']

In [14]:
liqa_novel_transcript_quant_data.head(3)

Unnamed: 0,composite_id,GTEX-1192X-0011-R10a-SM-4RXXZ,GTEX-11H98-0011-R11b-SM-4SFLZ,GTEX-11TTK-0011-R7b-SM-4TVFS,GTEX-1211K-0826-SM-7LDFQ,GTEX-1313W-0011-R7b-SM-4ZL3U,GTEX-13QBU-0426-SM-5A4VT,GTEX-13QJ3-0726-SM-7LDHS,GTEX-13QJ3-0726-SM-7LDHS_rep,GTEX-13RTJ-0011-R7b-SM-5CTCB,...,GTEX-ZT9X-1826-SM-4V2KV,GTEX-ZT9X-1826-SM-4V2KV_rep,GTEX-ZT9X-1826-SM-4V2KV_rep2,GTEX-ZVZP-0226-SM-4VEIO,K562_ampure,K562_ampure_70ng,K562_extrawash,K562_extrawashwarm,transcript_type,novel_or_annot
1,130750ff-7bd5-42fc-9951-2ee00e4c4253_ENSG00000...,1.072175e-16,0.916406,,,,,,,,...,,,,,,,,,1,novel
2,0d6ef114-5d3f-4df1-a18b-02c128c21368_ENSG00000...,0.001093079,0.003797,,,,,,,,...,,,,,,,,,0,novel
3,e692f135-46a2-4cf8-953c-c0c636810214_ENSG00000...,1.560275e-16,1.333594,,,,,,,,...,,,,,,,,,e,novel


In [15]:
liqa_novel_transcript_quant_data.shape

(62108, 95)

#### Transpose the dataframe

In [16]:
novel_transcript_quant_transposed = liqa_novel_transcript_quant_data.transpose().reset_index()
novel_transcript_quant_transposed.head(3)

Unnamed: 0,index,1,2,3,4,5,6,9,12,13,...,80360,80361,80362,80363,80364,80365,80366,80367,80368,80370
0,composite_id,130750ff-7bd5-42fc-9951-2ee00e4c4253_ENSG00000...,0d6ef114-5d3f-4df1-a18b-02c128c21368_ENSG00000...,e692f135-46a2-4cf8-953c-c0c636810214_ENSG00000...,73af7ae0-6dc4-4d06-8132-8e644cb837f4_ENSG00000...,7d8b36a5-f3cf-440a-b63e-7c020aa740e8_ENSG00000...,5bbbf8aa-951c-47d6-9ec3-744348d39a6a-1_ENSG000...,444373bd-bde4-473c-8bcd-a797e9e67d05_ENSG00000...,4dc07033-854e-4571-9154-635c4bd8cb5b_ENSG00000...,ed2d4aad-dc9d-4522-9980-3c3bdc5f7c8a_ENSG00000...,...,263a46b0-25e6-4c87-aefc-fc46c060d2c5_ENSG00000...,0863c39a-d70c-441c-9c17-da9213b0c898_ENSG00000...,8cae0922-74c0-4b44-b57f-bad8a387b9ac_ENSG00000...,1f1d9c64-7ace-4a58-a50c-05a0a415533e_ENSG00000...,07f25262-3f4a-4ba6-bcfc-d82a7c3351f7_ENSG00000...,3f2ac28b-a3c1-4237-a27d-8fe5ad946e11_ENSG00000...,74a320c6-b08c-48ab-b21f-fee3ad71669f_ENSG00000...,dc0b68a1-95fe-4962-8993-51bf0f30da17_ENSG00000...,e878980f-9ef6-4ad5-b3f8-47dbb38fd9de_ENSG00000...,5d80bee1-439b-42d6-bc8f-f9dd2d7d6b9c_ENSG00000...
1,GTEX-1192X-0011-R10a-SM-4RXXZ,0.0,0.001093,0.0,3.998907,0.0,2.149067,127.865646,0.0,3.461538,...,,,,,,,,,,
2,GTEX-11H98-0011-R11b-SM-4SFLZ,0.916406,0.003797,1.333594,5.621203,0.0,103.26299,103.817179,2.307692,28.846154,...,,,,,,,,,,


#### Replace the header

In [17]:
novel_transcript_quant_header = novel_transcript_quant_transposed.iloc[0] #grab the first row for the header
novel_transcript_quant_transposed = novel_transcript_quant_transposed[1:] #remove first row from dataset
novel_transcript_quant_transposed.columns = novel_transcript_quant_header #set the new header row
novel_transcript_quant_transposed.head(3)

Unnamed: 0,composite_id,130750ff-7bd5-42fc-9951-2ee00e4c4253_ENSG00000204540.10_ENSG00000204540.10,0d6ef114-5d3f-4df1-a18b-02c128c21368_ENSG00000204540.10_ENSG00000204540.10,e692f135-46a2-4cf8-953c-c0c636810214_ENSG00000204540.10_ENSG00000204540.10,73af7ae0-6dc4-4d06-8132-8e644cb837f4_ENSG00000204540.10_ENSG00000204540.10,7d8b36a5-f3cf-440a-b63e-7c020aa740e8_ENSG00000065665.20_ENSG00000065665.20,5bbbf8aa-951c-47d6-9ec3-744348d39a6a-1_ENSG00000065665.20_ENSG00000065665.20,444373bd-bde4-473c-8bcd-a797e9e67d05_ENSG00000065665.20_ENSG00000065665.20,4dc07033-854e-4571-9154-635c4bd8cb5b_ENSG00000104884.14_ENSG00000104884.14,ed2d4aad-dc9d-4522-9980-3c3bdc5f7c8a_ENSG00000104884.14_ENSG00000104884.14,...,263a46b0-25e6-4c87-aefc-fc46c060d2c5_ENSG00000163938.16_ENSG00000163938.16,0863c39a-d70c-441c-9c17-da9213b0c898_ENSG00000163938.16_ENSG00000163938.16,8cae0922-74c0-4b44-b57f-bad8a387b9ac_ENSG00000163938.16,1f1d9c64-7ace-4a58-a50c-05a0a415533e_ENSG00000163938.16_ENSG00000163938.16,07f25262-3f4a-4ba6-bcfc-d82a7c3351f7_ENSG00000163938.16,3f2ac28b-a3c1-4237-a27d-8fe5ad946e11_ENSG00000174231.16,74a320c6-b08c-48ab-b21f-fee3ad71669f_ENSG00000174231.16_ENSG00000174231.16,dc0b68a1-95fe-4962-8993-51bf0f30da17_ENSG00000174231.16_ENSG00000174231.16,e878980f-9ef6-4ad5-b3f8-47dbb38fd9de_ENSG00000174231.16_ENSG00000174231.16,5d80bee1-439b-42d6-bc8f-f9dd2d7d6b9c_ENSG00000174231.16_ENSG00000174231.16
1,GTEX-1192X-0011-R10a-SM-4RXXZ,0.0,0.001093,0.0,3.998907,0.0,2.149067,127.865646,0.0,3.461538,...,,,,,,,,,,
2,GTEX-11H98-0011-R11b-SM-4SFLZ,0.916406,0.003797,1.333594,5.621203,0.0,103.26299,103.817179,2.307692,28.846154,...,,,,,,,,,,
3,GTEX-11TTK-0011-R7b-SM-4TVFS,,,,,0.0,0.37965,58.33526,1.1,6.6,...,,,,,,,,,,


#### Rename first column

In [18]:
novel_transcript_quant_transposed = novel_transcript_quant_transposed.rename(columns = {"composite_id":"sample_id"})
novel_transcript_quant_transposed.head(3)

Unnamed: 0,sample_id,130750ff-7bd5-42fc-9951-2ee00e4c4253_ENSG00000204540.10_ENSG00000204540.10,0d6ef114-5d3f-4df1-a18b-02c128c21368_ENSG00000204540.10_ENSG00000204540.10,e692f135-46a2-4cf8-953c-c0c636810214_ENSG00000204540.10_ENSG00000204540.10,73af7ae0-6dc4-4d06-8132-8e644cb837f4_ENSG00000204540.10_ENSG00000204540.10,7d8b36a5-f3cf-440a-b63e-7c020aa740e8_ENSG00000065665.20_ENSG00000065665.20,5bbbf8aa-951c-47d6-9ec3-744348d39a6a-1_ENSG00000065665.20_ENSG00000065665.20,444373bd-bde4-473c-8bcd-a797e9e67d05_ENSG00000065665.20_ENSG00000065665.20,4dc07033-854e-4571-9154-635c4bd8cb5b_ENSG00000104884.14_ENSG00000104884.14,ed2d4aad-dc9d-4522-9980-3c3bdc5f7c8a_ENSG00000104884.14_ENSG00000104884.14,...,263a46b0-25e6-4c87-aefc-fc46c060d2c5_ENSG00000163938.16_ENSG00000163938.16,0863c39a-d70c-441c-9c17-da9213b0c898_ENSG00000163938.16_ENSG00000163938.16,8cae0922-74c0-4b44-b57f-bad8a387b9ac_ENSG00000163938.16,1f1d9c64-7ace-4a58-a50c-05a0a415533e_ENSG00000163938.16_ENSG00000163938.16,07f25262-3f4a-4ba6-bcfc-d82a7c3351f7_ENSG00000163938.16,3f2ac28b-a3c1-4237-a27d-8fe5ad946e11_ENSG00000174231.16,74a320c6-b08c-48ab-b21f-fee3ad71669f_ENSG00000174231.16_ENSG00000174231.16,dc0b68a1-95fe-4962-8993-51bf0f30da17_ENSG00000174231.16_ENSG00000174231.16,e878980f-9ef6-4ad5-b3f8-47dbb38fd9de_ENSG00000174231.16_ENSG00000174231.16,5d80bee1-439b-42d6-bc8f-f9dd2d7d6b9c_ENSG00000174231.16_ENSG00000174231.16
1,GTEX-1192X-0011-R10a-SM-4RXXZ,0.0,0.001093,0.0,3.998907,0.0,2.149067,127.865646,0.0,3.461538,...,,,,,,,,,,
2,GTEX-11H98-0011-R11b-SM-4SFLZ,0.916406,0.003797,1.333594,5.621203,0.0,103.26299,103.817179,2.307692,28.846154,...,,,,,,,,,,
3,GTEX-11TTK-0011-R7b-SM-4TVFS,,,,,0.0,0.37965,58.33526,1.1,6.6,...,,,,,,,,,,


In [19]:
novel_transcript_quant_transposed.tail(3)

Unnamed: 0,sample_id,130750ff-7bd5-42fc-9951-2ee00e4c4253_ENSG00000204540.10_ENSG00000204540.10,0d6ef114-5d3f-4df1-a18b-02c128c21368_ENSG00000204540.10_ENSG00000204540.10,e692f135-46a2-4cf8-953c-c0c636810214_ENSG00000204540.10_ENSG00000204540.10,73af7ae0-6dc4-4d06-8132-8e644cb837f4_ENSG00000204540.10_ENSG00000204540.10,7d8b36a5-f3cf-440a-b63e-7c020aa740e8_ENSG00000065665.20_ENSG00000065665.20,5bbbf8aa-951c-47d6-9ec3-744348d39a6a-1_ENSG00000065665.20_ENSG00000065665.20,444373bd-bde4-473c-8bcd-a797e9e67d05_ENSG00000065665.20_ENSG00000065665.20,4dc07033-854e-4571-9154-635c4bd8cb5b_ENSG00000104884.14_ENSG00000104884.14,ed2d4aad-dc9d-4522-9980-3c3bdc5f7c8a_ENSG00000104884.14_ENSG00000104884.14,...,263a46b0-25e6-4c87-aefc-fc46c060d2c5_ENSG00000163938.16_ENSG00000163938.16,0863c39a-d70c-441c-9c17-da9213b0c898_ENSG00000163938.16_ENSG00000163938.16,8cae0922-74c0-4b44-b57f-bad8a387b9ac_ENSG00000163938.16,1f1d9c64-7ace-4a58-a50c-05a0a415533e_ENSG00000163938.16_ENSG00000163938.16,07f25262-3f4a-4ba6-bcfc-d82a7c3351f7_ENSG00000163938.16,3f2ac28b-a3c1-4237-a27d-8fe5ad946e11_ENSG00000174231.16,74a320c6-b08c-48ab-b21f-fee3ad71669f_ENSG00000174231.16_ENSG00000174231.16,dc0b68a1-95fe-4962-8993-51bf0f30da17_ENSG00000174231.16_ENSG00000174231.16,e878980f-9ef6-4ad5-b3f8-47dbb38fd9de_ENSG00000174231.16_ENSG00000174231.16,5d80bee1-439b-42d6-bc8f-f9dd2d7d6b9c_ENSG00000174231.16_ENSG00000174231.16
92,K562_extrawashwarm,,,,,0.009087,12.211347,108.28125,0.0,7.857143,...,3.194677,0.297764,10.663216,0.0,0.0,10.666666,37.279285,0.012939,0.014319,0.01284
93,transcript_type,1,0,e,7,7,5,4,4,e,...,2,0,8,1,0,3,7,d,e,5
94,novel_or_annot,novel,novel,novel,novel,novel,novel,novel,novel,novel,...,novel,novel,novel,novel,novel,novel,novel,novel,novel,novel


**Drop last two rows**

In [20]:
novel_transcript_quant_transposed.drop(novel_transcript_quant_transposed.tail(2).index,inplace=True)

In [21]:
novel_transcript_quant_transposed.tail(3)

Unnamed: 0,sample_id,130750ff-7bd5-42fc-9951-2ee00e4c4253_ENSG00000204540.10_ENSG00000204540.10,0d6ef114-5d3f-4df1-a18b-02c128c21368_ENSG00000204540.10_ENSG00000204540.10,e692f135-46a2-4cf8-953c-c0c636810214_ENSG00000204540.10_ENSG00000204540.10,73af7ae0-6dc4-4d06-8132-8e644cb837f4_ENSG00000204540.10_ENSG00000204540.10,7d8b36a5-f3cf-440a-b63e-7c020aa740e8_ENSG00000065665.20_ENSG00000065665.20,5bbbf8aa-951c-47d6-9ec3-744348d39a6a-1_ENSG00000065665.20_ENSG00000065665.20,444373bd-bde4-473c-8bcd-a797e9e67d05_ENSG00000065665.20_ENSG00000065665.20,4dc07033-854e-4571-9154-635c4bd8cb5b_ENSG00000104884.14_ENSG00000104884.14,ed2d4aad-dc9d-4522-9980-3c3bdc5f7c8a_ENSG00000104884.14_ENSG00000104884.14,...,263a46b0-25e6-4c87-aefc-fc46c060d2c5_ENSG00000163938.16_ENSG00000163938.16,0863c39a-d70c-441c-9c17-da9213b0c898_ENSG00000163938.16_ENSG00000163938.16,8cae0922-74c0-4b44-b57f-bad8a387b9ac_ENSG00000163938.16,1f1d9c64-7ace-4a58-a50c-05a0a415533e_ENSG00000163938.16_ENSG00000163938.16,07f25262-3f4a-4ba6-bcfc-d82a7c3351f7_ENSG00000163938.16,3f2ac28b-a3c1-4237-a27d-8fe5ad946e11_ENSG00000174231.16,74a320c6-b08c-48ab-b21f-fee3ad71669f_ENSG00000174231.16_ENSG00000174231.16,dc0b68a1-95fe-4962-8993-51bf0f30da17_ENSG00000174231.16_ENSG00000174231.16,e878980f-9ef6-4ad5-b3f8-47dbb38fd9de_ENSG00000174231.16_ENSG00000174231.16,5d80bee1-439b-42d6-bc8f-f9dd2d7d6b9c_ENSG00000174231.16_ENSG00000174231.16
90,K562_ampure_70ng,,,,,0.006714,12.962501,118.825175,0.0,9.333333,...,,,,,,,,,,
91,K562_extrawash,,,,,0.0,0.194638,74.322581,0.0,6.25,...,,,,,,,,,,
92,K562_extrawashwarm,,,,,0.009087,12.211347,108.28125,0.0,7.857143,...,3.194677,0.297764,10.663216,0.0,0.0,10.666666,37.279285,0.012939,0.014319,0.01284


In [22]:
novel_transcript_quant_transposed.shape

(92, 62109)

**Save the dataframe**

In [23]:
# change working directory
os.chdir('C:\\Users\\15082\\OneDrive\\Desktop\\thesis_research')

In [24]:
#novel_transcript_quant_transposed.to_csv('gtex_v9_data\\data_for_analysis\\my_liqa_data\\liqa_novel_transcript_quant_transposed.csv', sep=',')

### Dataframe of annotated transcript quantification results

In [25]:
liqa_annotated_transcript_quant_data = liqa_merged_df[liqa_merged_df.novel_or_annot=='annot']

In [26]:
liqa_annotated_transcript_quant_data.head(5)

Unnamed: 0,composite_id,GTEX-1192X-0011-R10a-SM-4RXXZ,GTEX-11H98-0011-R11b-SM-4SFLZ,GTEX-11TTK-0011-R7b-SM-4TVFS,GTEX-1211K-0826-SM-7LDFQ,GTEX-1313W-0011-R7b-SM-4ZL3U,GTEX-13QBU-0426-SM-5A4VT,GTEX-13QJ3-0726-SM-7LDHS,GTEX-13QJ3-0726-SM-7LDHS_rep,GTEX-13RTJ-0011-R7b-SM-5CTCB,...,GTEX-ZT9X-1826-SM-4V2KV,GTEX-ZT9X-1826-SM-4V2KV_rep,GTEX-ZT9X-1826-SM-4V2KV_rep2,GTEX-ZVZP-0226-SM-4VEIO,K562_ampure,K562_ampure_70ng,K562_extrawash,K562_extrawashwarm,transcript_type,novel_or_annot
0,ENST00000479581.5_ENSG00000204540.10,3.675106e-16,1.125,,,,,,,,...,,,,,,,,,E,annot
7,ENST00000298428.13-1_ENSG00000065665.20,363.9853,338.231218,115.2851,9.821151,69.69124,29.979438,25.4597,29.92314,184.0439,...,134.2663,137.1557,97.37679,6.722615,0.247047,12.20561,21.48278,11.49832,E,annot
8,ENST00000304267.12_ENSG00000065665.20,5.6076599999999996e-201,5.688613,5.459904e-119,2.562447e-14,2.494378e-141,0.0,5.789408e-198,3.383807e-67,2.558707e-318,...,6.613746e-85,2.241024e-74,1.778325e-46,0.0,0.0,2.943469e-15,7.78605e-93,2.556189e-17,E,annot
10,ENST00000256015.4-1_ENSG00000133639.4,68.9993,257.990134,45.9986,195.9868,16.99962,61.998515,79.99542,91.99845,44.99914,...,167.9983,156.9975,116.9983,29.999712,12.999871,8.999987,19.99991,9.999904,E,annot
11,ENST00000256015.4_ENSG00000133639.4,0.000702456,0.009866,0.001403503,0.01316688,0.0003823346,0.001485,0.004579082,0.001545578,0.0008628469,...,0.001662722,0.002546506,0.00168305,0.000288,0.000129,1.269869e-05,9.37306e-05,9.598014e-05,E,annot


In [27]:
liqa_annotated_transcript_quant_data.shape

(18263, 95)

In [28]:
# transpose dataframe
annotated_transcript_quant_transposed = liqa_annotated_transcript_quant_data.transpose().reset_index()
# replace header
annotated_transcript_quant_header = annotated_transcript_quant_transposed.iloc[0] #grab the first row for the header
annotated_transcript_quant_transposed = annotated_transcript_quant_transposed[1:] #remove first row from dataset
annotated_transcript_quant_transposed.columns = annotated_transcript_quant_header #set the new header row
# change first column name
annotated_transcript_quant_transposed = annotated_transcript_quant_transposed.rename(columns = {"composite_id":"sample_id"})
annotated_transcript_quant_transposed.tail(3)

Unnamed: 0,sample_id,ENST00000479581.5_ENSG00000204540.10,ENST00000298428.13-1_ENSG00000065665.20,ENST00000304267.12_ENSG00000065665.20,ENST00000256015.4-1_ENSG00000133639.4,ENST00000256015.4_ENSG00000133639.4,ENST00000493834.2_ENSG00000143409.15,ENST00000312210.9_ENSG00000143409.15,ENST00000588737.5_ENSG00000141425.17,ENST00000357384.8_ENSG00000141425.17,...,ENST00000579039.2_ENSG00000266412.5,ENST00000308388.6_ENSG00000173540.12,ENST00000262126.8_ENSG00000101745.16,ENST00000532097.5_ENSG00000185627.17,ENST00000431206.6_ENSG00000185627.17,ENST00000352303.9_ENSG00000185627.17,ENST00000525665.5_ENSG00000185627.17,ENST00000542794.5_ENSG00000167985.6,ENST00000394799.6_ENSG00000163938.16,ENST00000304992.10_ENSG00000174231.16
92,K562_extrawashwarm,,11.498316,0.0,9.999904,0.000096,,,0.00004,0.030664,...,0.00366,0.00008,13.929571,3.832491,3.820334,0.0,3.595106,11.0,3.691112,0.013951
93,transcript_type,E,E,E,E,E,E,E,E,E,...,E,E,E,E,E,E,E,E,E,E
94,novel_or_annot,annot,annot,annot,annot,annot,annot,annot,annot,annot,...,annot,annot,annot,annot,annot,annot,annot,annot,annot,annot


**Drop last two rows**

In [30]:
annotated_transcript_quant_transposed.drop(annotated_transcript_quant_transposed.tail(2).index,inplace=True)

In [31]:
annotated_transcript_quant_transposed.shape

(92, 18264)

In [32]:
annotated_transcript_quant_transposed.tail(3)

Unnamed: 0,sample_id,ENST00000479581.5_ENSG00000204540.10,ENST00000298428.13-1_ENSG00000065665.20,ENST00000304267.12_ENSG00000065665.20,ENST00000256015.4-1_ENSG00000133639.4,ENST00000256015.4_ENSG00000133639.4,ENST00000493834.2_ENSG00000143409.15,ENST00000312210.9_ENSG00000143409.15,ENST00000588737.5_ENSG00000141425.17,ENST00000357384.8_ENSG00000141425.17,...,ENST00000579039.2_ENSG00000266412.5,ENST00000308388.6_ENSG00000173540.12,ENST00000262126.8_ENSG00000101745.16,ENST00000532097.5_ENSG00000185627.17,ENST00000431206.6_ENSG00000185627.17,ENST00000352303.9_ENSG00000185627.17,ENST00000525665.5_ENSG00000185627.17,ENST00000542794.5_ENSG00000167985.6,ENST00000394799.6_ENSG00000163938.16,ENST00000304992.10_ENSG00000174231.16
90,K562_ampure_70ng,,12.20561,0.0,8.999987,1.3e-05,,,0.018308,0.024448,...,,,,,,,,,,
91,K562_extrawash,,21.482782,0.0,19.999906,9.4e-05,0.0,0.0,7.9e-05,0.01339,...,,,,,,,,,,
92,K562_extrawashwarm,,11.498316,0.0,9.999904,9.6e-05,,,4e-05,0.030664,...,0.00366,8e-05,13.929571,3.832491,3.820334,0.0,3.595106,11.0,3.691112,0.013951


**Save the dataframe**

In [33]:
#annotated_transcript_quant_transposed.to_csv('gtex_v9_data\\data_for_analysis\\my_liqa_data\\liqa_annotated_transcript_quant_transposed.csv', sep=',')