# LIQA Transcript Quantification Results Analysis Part 3

Continuing from part 2, this notebook focuses on further data cleaning and manipulation which leads to the final dataframes for visualization scripts in part 4.

## Part 1: Import Data and Configure Python Libraries

In [3]:
import os
import glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.gridspec
%matplotlib inline
import seaborn as sns
import re
from IPython.display import display
from matplotlib.pyplot import gcf
from sklearn.decomposition import PCA 
from sklearn.preprocessing import StandardScaler
from PIL import ImageColor
from matplotlib.patches import Patch #for custom legend making
import scipy.spatial as sp, scipy.cluster.hierarchy as hc #for faster computing of hierarchial clusters

In [2]:
#pd.options.display.max_columns = None #display all columns in dataframe
#pd.options.display.max_rows = None

In [3]:
#pd.options.display.max_colwidth = 100 #show the full content of long strings

### Import Data

In [4]:
os.getcwd()

'C:\\Users\\15082\\OneDrive\\Desktop\\thesis_research\\gtex_v9_data_analysis\\LIQA\\work_in_progress'

In [5]:
data_dir = 'gtex_v9_data\\data_for_analysis'

In [6]:
sample_info_path = os.path.join(data_dir, 'gtex_database_data\\sample_info_complete.csv')
novel_transcript_quant_transposed_path = os.path.join(data_dir, 
                                                      'my_liqa_data\\liqa_novel_transcript_quant_transposed.csv')
annotated_transcript_quant_transposed_path = os.path.join(data_dir, 
                                                      'my_liqa_data\\liqa_annotated_transcript_quant_transposed.csv')

#### Read data into pandas dataframe

In [7]:
# change working directory
os.chdir('C:\\Users\\15082\\OneDrive\\Desktop\\thesis_research')

In [8]:
sample_info = pd.read_csv(sample_info_path)
novel_transcript_quant_transposed = pd.read_csv(novel_transcript_quant_transposed_path)
annotated_transcript_quant_transposed = pd.read_csv(annotated_transcript_quant_transposed_path)

## Part 2: Data Manipulation and Data Cleaning

#### Dataframe of Novel Transcripts

In [11]:
novel_transcript_quant_transposed.head(3)

Unnamed: 0.1,Unnamed: 0,sample_id,130750ff-7bd5-42fc-9951-2ee00e4c4253_ENSG00000204540.10_ENSG00000204540.10,0d6ef114-5d3f-4df1-a18b-02c128c21368_ENSG00000204540.10_ENSG00000204540.10,e692f135-46a2-4cf8-953c-c0c636810214_ENSG00000204540.10_ENSG00000204540.10,73af7ae0-6dc4-4d06-8132-8e644cb837f4_ENSG00000204540.10_ENSG00000204540.10,7d8b36a5-f3cf-440a-b63e-7c020aa740e8_ENSG00000065665.20_ENSG00000065665.20,5bbbf8aa-951c-47d6-9ec3-744348d39a6a-1_ENSG00000065665.20_ENSG00000065665.20,444373bd-bde4-473c-8bcd-a797e9e67d05_ENSG00000065665.20_ENSG00000065665.20,4dc07033-854e-4571-9154-635c4bd8cb5b_ENSG00000104884.14_ENSG00000104884.14,...,263a46b0-25e6-4c87-aefc-fc46c060d2c5_ENSG00000163938.16_ENSG00000163938.16,0863c39a-d70c-441c-9c17-da9213b0c898_ENSG00000163938.16_ENSG00000163938.16,8cae0922-74c0-4b44-b57f-bad8a387b9ac_ENSG00000163938.16,1f1d9c64-7ace-4a58-a50c-05a0a415533e_ENSG00000163938.16_ENSG00000163938.16,07f25262-3f4a-4ba6-bcfc-d82a7c3351f7_ENSG00000163938.16,3f2ac28b-a3c1-4237-a27d-8fe5ad946e11_ENSG00000174231.16,74a320c6-b08c-48ab-b21f-fee3ad71669f_ENSG00000174231.16_ENSG00000174231.16,dc0b68a1-95fe-4962-8993-51bf0f30da17_ENSG00000174231.16_ENSG00000174231.16,e878980f-9ef6-4ad5-b3f8-47dbb38fd9de_ENSG00000174231.16_ENSG00000174231.16,5d80bee1-439b-42d6-bc8f-f9dd2d7d6b9c_ENSG00000174231.16_ENSG00000174231.16
0,1,GTEX-1192X-0011-R10a-SM-4RXXZ,1.072175e-16,0.001093,1.560275e-16,3.998907,3.49816e-25,2.149067,127.865646,0.0,...,,,,,,,,,,
1,2,GTEX-11H98-0011-R11b-SM-4SFLZ,0.9164062,0.003797,1.333594,5.621203,1.036529e-110,103.26299,103.817179,2.307692,...,,,,,,,,,,
2,3,GTEX-11TTK-0011-R7b-SM-4TVFS,,,,,2.0399239999999998e-19,0.37965,58.33526,1.1,...,,,,,,,,,,


In [12]:
# drop the first column
novel_transcript_quant_transposed.drop(columns=novel_transcript_quant_transposed.columns[0], 
                                       axis=1, inplace=True)

In [13]:
novel_transcript_quant_transposed.head(3)

Unnamed: 0,sample_id,130750ff-7bd5-42fc-9951-2ee00e4c4253_ENSG00000204540.10_ENSG00000204540.10,0d6ef114-5d3f-4df1-a18b-02c128c21368_ENSG00000204540.10_ENSG00000204540.10,e692f135-46a2-4cf8-953c-c0c636810214_ENSG00000204540.10_ENSG00000204540.10,73af7ae0-6dc4-4d06-8132-8e644cb837f4_ENSG00000204540.10_ENSG00000204540.10,7d8b36a5-f3cf-440a-b63e-7c020aa740e8_ENSG00000065665.20_ENSG00000065665.20,5bbbf8aa-951c-47d6-9ec3-744348d39a6a-1_ENSG00000065665.20_ENSG00000065665.20,444373bd-bde4-473c-8bcd-a797e9e67d05_ENSG00000065665.20_ENSG00000065665.20,4dc07033-854e-4571-9154-635c4bd8cb5b_ENSG00000104884.14_ENSG00000104884.14,ed2d4aad-dc9d-4522-9980-3c3bdc5f7c8a_ENSG00000104884.14_ENSG00000104884.14,...,263a46b0-25e6-4c87-aefc-fc46c060d2c5_ENSG00000163938.16_ENSG00000163938.16,0863c39a-d70c-441c-9c17-da9213b0c898_ENSG00000163938.16_ENSG00000163938.16,8cae0922-74c0-4b44-b57f-bad8a387b9ac_ENSG00000163938.16,1f1d9c64-7ace-4a58-a50c-05a0a415533e_ENSG00000163938.16_ENSG00000163938.16,07f25262-3f4a-4ba6-bcfc-d82a7c3351f7_ENSG00000163938.16,3f2ac28b-a3c1-4237-a27d-8fe5ad946e11_ENSG00000174231.16,74a320c6-b08c-48ab-b21f-fee3ad71669f_ENSG00000174231.16_ENSG00000174231.16,dc0b68a1-95fe-4962-8993-51bf0f30da17_ENSG00000174231.16_ENSG00000174231.16,e878980f-9ef6-4ad5-b3f8-47dbb38fd9de_ENSG00000174231.16_ENSG00000174231.16,5d80bee1-439b-42d6-bc8f-f9dd2d7d6b9c_ENSG00000174231.16_ENSG00000174231.16
0,GTEX-1192X-0011-R10a-SM-4RXXZ,1.072175e-16,0.001093,1.560275e-16,3.998907,3.49816e-25,2.149067,127.865646,0.0,3.461538,...,,,,,,,,,,
1,GTEX-11H98-0011-R11b-SM-4SFLZ,0.9164062,0.003797,1.333594,5.621203,1.036529e-110,103.26299,103.817179,2.307692,28.846154,...,,,,,,,,,,
2,GTEX-11TTK-0011-R7b-SM-4TVFS,,,,,2.0399239999999998e-19,0.37965,58.33526,1.1,6.6,...,,,,,,,,,,


#### Dataframe of Annotated Transcripts

In [14]:
annotated_transcript_quant_transposed.head(3)

Unnamed: 0.1,Unnamed: 0,sample_id,ENST00000479581.5_ENSG00000204540.10,ENST00000298428.13-1_ENSG00000065665.20,ENST00000304267.12_ENSG00000065665.20,ENST00000256015.4-1_ENSG00000133639.4,ENST00000256015.4_ENSG00000133639.4,ENST00000493834.2_ENSG00000143409.15,ENST00000312210.9_ENSG00000143409.15,ENST00000588737.5_ENSG00000141425.17,...,ENST00000579039.2_ENSG00000266412.5,ENST00000308388.6_ENSG00000173540.12,ENST00000262126.8_ENSG00000101745.16,ENST00000532097.5_ENSG00000185627.17,ENST00000431206.6_ENSG00000185627.17,ENST00000352303.9_ENSG00000185627.17,ENST00000525665.5_ENSG00000185627.17,ENST00000542794.5_ENSG00000167985.6,ENST00000394799.6_ENSG00000163938.16,ENST00000304992.10_ENSG00000174231.16
0,1,GTEX-1192X-0011-R10a-SM-4RXXZ,3.675106e-16,363.985286,5.6076599999999996e-201,68.999298,0.000702,0.0009584247,3.4e-05,3.068856,...,,,,,,,,,,
1,2,GTEX-11H98-0011-R11b-SM-4SFLZ,1.125,338.231218,5.688613,257.990134,0.009866,3.794646e-08,6.3e-05,2.34222,...,,,,,,,,,,
2,3,GTEX-11TTK-0011-R7b-SM-4TVFS,,115.28509,5.459904e-119,45.998596,0.001404,1.137861e-05,2.798802,5.160269e-16,...,,,,,,,,,,


In [15]:
# drop the first column
annotated_transcript_quant_transposed.drop(columns=annotated_transcript_quant_transposed.columns[0], 
                                       axis=1, inplace=True)

In [16]:
annotated_transcript_quant_transposed.head(3)

Unnamed: 0,sample_id,ENST00000479581.5_ENSG00000204540.10,ENST00000298428.13-1_ENSG00000065665.20,ENST00000304267.12_ENSG00000065665.20,ENST00000256015.4-1_ENSG00000133639.4,ENST00000256015.4_ENSG00000133639.4,ENST00000493834.2_ENSG00000143409.15,ENST00000312210.9_ENSG00000143409.15,ENST00000588737.5_ENSG00000141425.17,ENST00000357384.8_ENSG00000141425.17,...,ENST00000579039.2_ENSG00000266412.5,ENST00000308388.6_ENSG00000173540.12,ENST00000262126.8_ENSG00000101745.16,ENST00000532097.5_ENSG00000185627.17,ENST00000431206.6_ENSG00000185627.17,ENST00000352303.9_ENSG00000185627.17,ENST00000525665.5_ENSG00000185627.17,ENST00000542794.5_ENSG00000167985.6,ENST00000394799.6_ENSG00000163938.16,ENST00000304992.10_ENSG00000174231.16
0,GTEX-1192X-0011-R10a-SM-4RXXZ,3.675106e-16,363.985286,5.6076599999999996e-201,68.999298,0.000702,0.0009584247,3.4e-05,3.068856,8.842624,...,,,,,,,,,,
1,GTEX-11H98-0011-R11b-SM-4SFLZ,1.125,338.231218,5.688613,257.990134,0.009866,3.794646e-08,6.3e-05,2.34222,0.080356,...,,,,,,,,,,
2,GTEX-11TTK-0011-R7b-SM-4TVFS,,115.28509,5.459904e-119,45.998596,0.001404,1.137861e-05,2.798802,5.160269e-16,0.031625,...,,,,,,,,,,


#### Dataframe of Sample Id's and Sample Tissue Type

Select only sample id's and tissue type columns from the sample info data table.

In [17]:
sample_info.head()

Unnamed: 0,sample_id,date_of_sequencing,sample_name,tissue,protocol,mrna_rin,flush_buffer,amount_loaded_ng,run_time,total_reads,median_read_length,median_read_quality,aligned_reads,median_read_length_align,median_read_quality_aligned,WGS,data_center,RNA_extraction_method,3_prime_bias_median,3_prime_bias_sd
0,LV1681,53119,CVD-LV1681,Heart - Left Ventricle,cDNA-PCR,,PBT,60.0,48.0,2287307,195,9.9,620717,696,10.9,No,BROAD,RNA Extraction from Paxgene-derived Lysate Pla...,0.653,0.378
1,LV1702,53119,CVD-LV1702,Heart - Left Ventricle,cDNA-PCR,,PBT,60.0,48.0,4456040,211,10.3,1517665,737,11.5,No,BROAD,RNA Extraction from Paxgene-derived Lysate Pla...,0.754,0.357
2,LV1708,60319,CVD-LV1708,Heart - Left Ventricle,cDNA-PCR,,PBT,60.0,48.0,2586875,261,10.5,1117070,699,11.2,No,BROAD,RNA Extraction from Paxgene-derived Lysate Pla...,0.659,0.382
3,LV1723,60319,CVD-LV1723,Heart - Left Ventricle,cDNA-PCR,,PBT,60.0,48.0,3577244,230,10.5,1017015,666,11.5,No,BROAD,RNA Extraction from Paxgene-derived Lysate Pla...,0.57,0.399
4,GTEX-1192X-0011-R10a-SM-4RXXZ,52219,GTEX-1192X,Brain - Frontal Cortex (BA9),cDNA-PCR,8.7,PBT,60.0,48.0,7568902,651,11.4,5593813,750,11.8,Yes,BROAD,RNA isolation_PAXgene Tissue miRNA,0.782,0.348


In [18]:
sample_id_tissue = sample_info[['sample_id','tissue']]
sample_id_tissue.head(5)

Unnamed: 0,sample_id,tissue
0,LV1681,Heart - Left Ventricle
1,LV1702,Heart - Left Ventricle
2,LV1708,Heart - Left Ventricle
3,LV1723,Heart - Left Ventricle
4,GTEX-1192X-0011-R10a-SM-4RXXZ,Brain - Frontal Cortex (BA9)


### Merge novel transcript quant data table with tissue type information.

In [19]:
novel_transcript_express_tissue = novel_transcript_quant_transposed.merge(sample_id_tissue,how='left',
                                                                         left_on='sample_id',right_on='sample_id')
# shift column 'tissue' to second position
novel_tissue_column = novel_transcript_express_tissue.pop('tissue')
novel_transcript_express_tissue.insert(1, 'tissue', novel_tissue_column)
novel_transcript_express_tissue.tissue = novel_transcript_express_tissue.tissue.astype('str')
novel_transcript_express_tissue.head(5)

Unnamed: 0,sample_id,tissue,130750ff-7bd5-42fc-9951-2ee00e4c4253_ENSG00000204540.10_ENSG00000204540.10,0d6ef114-5d3f-4df1-a18b-02c128c21368_ENSG00000204540.10_ENSG00000204540.10,e692f135-46a2-4cf8-953c-c0c636810214_ENSG00000204540.10_ENSG00000204540.10,73af7ae0-6dc4-4d06-8132-8e644cb837f4_ENSG00000204540.10_ENSG00000204540.10,7d8b36a5-f3cf-440a-b63e-7c020aa740e8_ENSG00000065665.20_ENSG00000065665.20,5bbbf8aa-951c-47d6-9ec3-744348d39a6a-1_ENSG00000065665.20_ENSG00000065665.20,444373bd-bde4-473c-8bcd-a797e9e67d05_ENSG00000065665.20_ENSG00000065665.20,4dc07033-854e-4571-9154-635c4bd8cb5b_ENSG00000104884.14_ENSG00000104884.14,...,263a46b0-25e6-4c87-aefc-fc46c060d2c5_ENSG00000163938.16_ENSG00000163938.16,0863c39a-d70c-441c-9c17-da9213b0c898_ENSG00000163938.16_ENSG00000163938.16,8cae0922-74c0-4b44-b57f-bad8a387b9ac_ENSG00000163938.16,1f1d9c64-7ace-4a58-a50c-05a0a415533e_ENSG00000163938.16_ENSG00000163938.16,07f25262-3f4a-4ba6-bcfc-d82a7c3351f7_ENSG00000163938.16,3f2ac28b-a3c1-4237-a27d-8fe5ad946e11_ENSG00000174231.16,74a320c6-b08c-48ab-b21f-fee3ad71669f_ENSG00000174231.16_ENSG00000174231.16,dc0b68a1-95fe-4962-8993-51bf0f30da17_ENSG00000174231.16_ENSG00000174231.16,e878980f-9ef6-4ad5-b3f8-47dbb38fd9de_ENSG00000174231.16_ENSG00000174231.16,5d80bee1-439b-42d6-bc8f-f9dd2d7d6b9c_ENSG00000174231.16_ENSG00000174231.16
0,GTEX-1192X-0011-R10a-SM-4RXXZ,Brain - Frontal Cortex (BA9),1.072175e-16,0.001093,1.560275e-16,3.998907,3.49816e-25,2.149067,127.865646,0.0,...,,,,,,,,,,
1,GTEX-11H98-0011-R11b-SM-4SFLZ,Brain - Cerebellar Hemisphere,0.9164062,0.003797,1.333594,5.621203,1.036529e-110,103.26299,103.817179,2.307692,...,,,,,,,,,,
2,GTEX-11TTK-0011-R7b-SM-4TVFS,Brain - Putamen (basal ganglia),,,,,2.0399239999999998e-19,0.37965,58.33526,1.1,...,,,,,,,,,,
3,GTEX-1211K-0826-SM-7LDFQ,Lung,,,,,0.02264496,10.430177,201.726027,8.333333,...,,,,,,,,,,
4,GTEX-1313W-0011-R7b-SM-4ZL3U,Brain - Putamen (basal ganglia),,,,,3.738208e-21,0.180323,42.12844,0.0,...,,,,,,,,,,


### Merge annotated transcript quant data table with tissue type information.

In [20]:
annotated_transcript_express_tissue = annotated_transcript_quant_transposed.merge(sample_id_tissue,how='left',
                                                                         left_on='sample_id',right_on='sample_id')
# shift column 'tissue' to second position
annotated_tissue_column = annotated_transcript_express_tissue.pop('tissue')
annotated_transcript_express_tissue.insert(1, 'tissue', annotated_tissue_column)
annotated_transcript_express_tissue.tissue = annotated_transcript_express_tissue.tissue.astype('str')
annotated_transcript_express_tissue.head(5)

Unnamed: 0,sample_id,tissue,ENST00000479581.5_ENSG00000204540.10,ENST00000298428.13-1_ENSG00000065665.20,ENST00000304267.12_ENSG00000065665.20,ENST00000256015.4-1_ENSG00000133639.4,ENST00000256015.4_ENSG00000133639.4,ENST00000493834.2_ENSG00000143409.15,ENST00000312210.9_ENSG00000143409.15,ENST00000588737.5_ENSG00000141425.17,...,ENST00000579039.2_ENSG00000266412.5,ENST00000308388.6_ENSG00000173540.12,ENST00000262126.8_ENSG00000101745.16,ENST00000532097.5_ENSG00000185627.17,ENST00000431206.6_ENSG00000185627.17,ENST00000352303.9_ENSG00000185627.17,ENST00000525665.5_ENSG00000185627.17,ENST00000542794.5_ENSG00000167985.6,ENST00000394799.6_ENSG00000163938.16,ENST00000304992.10_ENSG00000174231.16
0,GTEX-1192X-0011-R10a-SM-4RXXZ,Brain - Frontal Cortex (BA9),3.675106e-16,363.985286,5.6076599999999996e-201,68.999298,0.000702,0.0009584247,3.399289e-05,3.068856,...,,,,,,,,,,
1,GTEX-11H98-0011-R11b-SM-4SFLZ,Brain - Cerebellar Hemisphere,1.125,338.231218,5.688613,257.990134,0.009866,3.794646e-08,6.349478e-05,2.34222,...,,,,,,,,,,
2,GTEX-11TTK-0011-R7b-SM-4TVFS,Brain - Putamen (basal ganglia),,115.28509,5.459904e-119,45.998596,0.001404,1.137861e-05,2.798802,5.160269e-16,...,,,,,,,,,,
3,GTEX-1211K-0826-SM-7LDFQ,Lung,,9.821151,2.562447e-14,195.986833,0.013167,1.575,2.755864,3.5202909999999997e-87,...,,,,,,,,,,
4,GTEX-1313W-0011-R7b-SM-4ZL3U,Brain - Putamen (basal ganglia),,69.691237,2.494378e-141,16.999618,0.000382,5.142857,8.791583e-16,2.184079e-16,...,,,,,,,,,,


### Further Data Cleaning

In [21]:
np.unique(novel_transcript_express_tissue['tissue'])

array(['Adipose - Subcutaneous',
       'Brain - Anterior cingulate cortex (BA24)',
       'Brain - Caudate (basal ganglia)', 'Brain - Cerebellar Hemisphere',
       'Brain - Frontal Cortex (BA9)', 'Brain - Putamen (basal ganglia)',
       'Breast - Mammary Tissue', 'Cells - Cultured fibroblasts',
       'Heart - Atrial Appendage', 'Heart - Left Ventricle', 'K562',
       'Liver', 'Lung', 'Muscle - Skeletal', 'Pancreas'], dtype=object)

In [22]:
len(np.unique(novel_transcript_express_tissue['tissue']))

15

We will remove K562 samples from the dataset.

In [23]:
novel_transcript_express_tissue_clean = novel_transcript_express_tissue[~novel_transcript_express_tissue['tissue']\
                                                                        .isin(['K562'])]
annotated_transcript_express_tissue_clean = annotated_transcript_express_tissue[~annotated_transcript_express_tissue['tissue']\
                                                                        .isin(['K562'])]

In [26]:
novel_transcript_express_tissue_clean.tail(3)

Unnamed: 0,sample_id,tissue,130750ff-7bd5-42fc-9951-2ee00e4c4253_ENSG00000204540.10_ENSG00000204540.10,0d6ef114-5d3f-4df1-a18b-02c128c21368_ENSG00000204540.10_ENSG00000204540.10,e692f135-46a2-4cf8-953c-c0c636810214_ENSG00000204540.10_ENSG00000204540.10,73af7ae0-6dc4-4d06-8132-8e644cb837f4_ENSG00000204540.10_ENSG00000204540.10,7d8b36a5-f3cf-440a-b63e-7c020aa740e8_ENSG00000065665.20_ENSG00000065665.20,5bbbf8aa-951c-47d6-9ec3-744348d39a6a-1_ENSG00000065665.20_ENSG00000065665.20,444373bd-bde4-473c-8bcd-a797e9e67d05_ENSG00000065665.20_ENSG00000065665.20,4dc07033-854e-4571-9154-635c4bd8cb5b_ENSG00000104884.14_ENSG00000104884.14,...,263a46b0-25e6-4c87-aefc-fc46c060d2c5_ENSG00000163938.16_ENSG00000163938.16,0863c39a-d70c-441c-9c17-da9213b0c898_ENSG00000163938.16_ENSG00000163938.16,8cae0922-74c0-4b44-b57f-bad8a387b9ac_ENSG00000163938.16,1f1d9c64-7ace-4a58-a50c-05a0a415533e_ENSG00000163938.16_ENSG00000163938.16,07f25262-3f4a-4ba6-bcfc-d82a7c3351f7_ENSG00000163938.16,3f2ac28b-a3c1-4237-a27d-8fe5ad946e11_ENSG00000174231.16,74a320c6-b08c-48ab-b21f-fee3ad71669f_ENSG00000174231.16_ENSG00000174231.16,dc0b68a1-95fe-4962-8993-51bf0f30da17_ENSG00000174231.16_ENSG00000174231.16,e878980f-9ef6-4ad5-b3f8-47dbb38fd9de_ENSG00000174231.16_ENSG00000174231.16,5d80bee1-439b-42d6-bc8f-f9dd2d7d6b9c_ENSG00000174231.16_ENSG00000174231.16
85,GTEX-ZT9X-1826-SM-4V2KV_rep,Muscle - Skeletal,,,,,2.165014e-12,0.194732,83.649533,3.875,...,,,,,,,,,,
86,GTEX-ZT9X-1826-SM-4V2KV_rep2,Muscle - Skeletal,,,,,1.402681e-20,11.012744,72.610465,2.736842,...,,,,,,,,,,
87,GTEX-ZVZP-0226-SM-4VEIO,Heart - Left Ventricle,,,,,0.004115971,6.75475,14.518519,,...,,,,,,,,,,


In [27]:
annotated_transcript_express_tissue_clean.tail(3)

Unnamed: 0,sample_id,tissue,ENST00000479581.5_ENSG00000204540.10,ENST00000298428.13-1_ENSG00000065665.20,ENST00000304267.12_ENSG00000065665.20,ENST00000256015.4-1_ENSG00000133639.4,ENST00000256015.4_ENSG00000133639.4,ENST00000493834.2_ENSG00000143409.15,ENST00000312210.9_ENSG00000143409.15,ENST00000588737.5_ENSG00000141425.17,...,ENST00000579039.2_ENSG00000266412.5,ENST00000308388.6_ENSG00000173540.12,ENST00000262126.8_ENSG00000101745.16,ENST00000532097.5_ENSG00000185627.17,ENST00000431206.6_ENSG00000185627.17,ENST00000352303.9_ENSG00000185627.17,ENST00000525665.5_ENSG00000185627.17,ENST00000542794.5_ENSG00000167985.6,ENST00000394799.6_ENSG00000163938.16,ENST00000304992.10_ENSG00000174231.16
85,GTEX-ZT9X-1826-SM-4V2KV_rep,Muscle - Skeletal,,137.155736,2.241024e-74,156.997453,0.002547,2.715009,0.000908,3.593556,...,,,,,,,,,,
86,GTEX-ZT9X-1826-SM-4V2KV_rep2,Muscle - Skeletal,,97.376791,1.778325e-46,116.998317,0.001683,0.00043,2.263158,7.983395,...,,,,,,,,,,
87,GTEX-ZVZP-0226-SM-4VEIO,Heart - Left Ventricle,,6.722615,0.0,29.999712,0.000288,2.666808,9e-06,0.000153,...,,,,,,,,,,


In [28]:
novel_transcript_express_tissue_clean.shape

(88, 62110)

In [29]:
annotated_transcript_express_tissue_clean.shape

(88, 18265)

#### Export Clean Dataframes

In [30]:
#novel_transcript_express_tissue_clean.to_csv('gtex_v9_data\\data_for_analysis\\my_liqa_data\\liqa_novel_transcript_quant_clean.csv', sep=',')

In [31]:
#annotated_transcript_express_tissue_clean.to_csv('gtex_v9_data\\data_for_analysis\\my_liqa_data\\liqa_annotated_transcript_quant_clean.csv', sep=',')