In [1]:
%%html
<style>
div.output_stderr{
    display:none
}
</style>
<a id='top'></a>
# div.output_stderr{display:none}

# mini_pipelines_toolbox.py function Usage Examples.
* Documentation for "../src/mini_pipelines_toolbox.py" function operation.

### Links:
##### (private) source repository:  https://github.com/dlanier/minipipelines.git

### Spreadsheet Transformations
1. [select_subtype_df](#select_subtype_df)
2. [common_samples_df](#common_samples_df)
3. [select_genes_df](#select_genes_df)
4. [merge_df](#merge_df)
5. [cluster_statistics_df](#cluster_statistics_df)


### Basic Transformations
1. [transpose_df](#transpose_df)
2. [abs_df](#abs_df)
3. [z_transform_df](#z_transform_df)
4. [log_transform_df](#log_transform_df)
5. [threshold_df](#threshold_df)
6. [stats_df](#stats_df)


### Utilities
1. [write_transform_df](#write_transform_df)
2. [get_outfile_name](#get_outfile_name)
3. [read_a_list_file](#read_a_list_file)


In [2]:
import os
import sys
import pandas as pd

sys.path.insert(1, '../src')
import mini_pipelines_toolbox as mpt

data_dir = '../data/spreadsheets'

results_dir = os.path.join(os.path.abspath('../test'), 'run_dir/results')
if not os.path.isdir(results_dir):
    print('Created witout overwrite:', results_dir)
    os.makedirs(results_dir)

In [3]:
#                    Display the content of the data directory
os.listdir(data_dir)

['gene_samples_1.tsv',
 'gene_samples_1_list.txt',
 'gene_samples_small.tsv',
 'gene_samples_small_labels.tsv',
 'spreadsheet_A_.G.tsv',
 'spreadsheet_B_.G.tsv',
 'spreadsheet_One.txt',
 'spreadsheet_Two.txt',
 'tcga_ucec_somatic_mutation_data.df',
 'transform_5_spreadsheet.tsv',
 'UCEC_phenotype.txt']

# Spreadsheet Transformations

<a id="select_subtype_df"></a>

### sub_df, phen_df = select_subtype_df(spread_df, pheno_df, phenotype_id, select_category) [[top]](#top)
* spreadsheet_df is genes x samples.
* phenotype_df is samples x phenotypes.
* phenotype_id must exist as a column name in the phenotype_df.
* and select_category must exist in the phenotype_id of that column.
* "phen_df" has only the samples (rows) where "select_catagory" match
* "sub_df" - the subtype dataframe will have only those samples (columns) where the category matched in the phenotpye.

In [4]:
spreadsheet_filename = 'tcga_ucec_somatic_mutation_data.df'
spreadsheet_filename = os.path.join(data_dir, spreadsheet_filename)
spreadsheet_df = pd.read_csv(spreadsheet_filename, sep='\t', index_col=0, header=0)
print(spreadsheet_df.head())
phenotype_filename = 'UCEC_phenotype.txt'
phenotype_filename = os.path.join(data_dir, phenotype_filename)
phenotype_df = pd.read_csv(phenotype_filename, sep='\t', index_col=0, header=0)
print(phenotype_df.head())
phenotype_id = 'grade'
select_category = 'grade 2'

sub_df, pheno_df = mpt.select_subtype_df(spreadsheet_df, phenotype_df, phenotype_id, select_category)
print(sub_df.head())
print(pheno_df.head())

                 TCGA-A5-A0G3  TCGA-A5-A0G5  TCGA-A5-A0G9  TCGA-A5-A0GA  \
ENSG00000121410             0             0             0             0   
ENSG00000268895             0             0             0             0   
ENSG00000148584             0             0             0             0   
ENSG00000175899             0             0             0             0   
ENSG00000166535             0             0             1             0   

                 TCGA-A5-A0GB  TCGA-A5-A0GD  TCGA-A5-A0GE  TCGA-A5-A0GH  \
ENSG00000121410             0             0             0             0   
ENSG00000268895             0             0             0             0   
ENSG00000148584             0             0             1             0   
ENSG00000175899             0             0             0             0   
ENSG00000166535             0             0             0             0   

                 TCGA-A5-A0GI  TCGA-A5-A0GJ      ...       TCGA-DI-A1NN  \
ENSG00000121410        

<a id='common_samples_df'></a>

### sp_1_df, sp_2_df = get_common_samples_df(spreadsheet_1_df, spreadsheet_2_df) [[top]](#top)

In [5]:
spreadsheet1_filename = 'spreadsheet_One.txt'
spreadsheet1_filename = os.path.join(data_dir, spreadsheet1_filename)
spreadsheet_1_df = pd.read_csv(spreadsheet1_filename, sep='\t', index_col=0, header=0)
print(spreadsheet_1_df.head())

spreadsheet2_filename = 'spreadsheet_Two.txt'
spreadsheet2_filename = os.path.join(data_dir, spreadsheet2_filename)
spreadsheet_2_df = pd.read_csv(spreadsheet2_filename, sep='\t', index_col=0, header=0)
print(spreadsheet_2_df.head())

sp_1_df, sp_2_df = mpt.common_samples_df(spreadsheet_1_df,spreadsheet_2_df)
print(sp_1_df.head())
print(sp_2_df.head())

              days_to_death  days_to_last  days_survival  diag_age  \
sample_id                                                            
TCGA-A5-A0G1         3251.0           NaN           3251        67   
TCGA-A5-A0G3            NaN        1079.0           1079        61   
TCGA-A5-A0G5            NaN         790.0            790        73   
TCGA-A5-A0GA          543.0           NaN            543        67   
TCGA-A5-A0GB            NaN         275.0            275        65   

                                   race               ethnicity  gender  
sample_id                                                                
TCGA-A5-A0G1                      white  not hispanic or latino  female  
TCGA-A5-A0G3  black or african american                     NaN  female  
TCGA-A5-A0G5  black or african american                     NaN  female  
TCGA-A5-A0GA                      white  not hispanic or latino  female  
TCGA-A5-A0GB                      white                     NaN  

<a id="select_genes_df"></a>

### shorter_df = select_genes_df(spreadsheet_df, gene_select_list) [[top]](#top)
* spreadsheet_df is a pandas dataframe.
* "gene_select_list" is a python list.

In [6]:
spreadsheet_filename = 'gene_samples_1.tsv'
spreadsheet_filename = os.path.join(data_dir, spreadsheet_filename)
spreadsheet_df = pd.read_csv(spreadsheet_filename, sep='\t', index_col=0, header=0)
print(spreadsheet_df.head())

list_file_name = 'gene_samples_1_list.txt'
list_file_name = os.path.join(data_dir, list_file_name)
gene_select_list = mpt.read_a_list_file(list_file_name)
print('\nSelect Genes:', gene_select_list)

short_df = mpt.select_genes_df(spreadsheet_df, gene_select_list)
short_df.head()

              UDDSE     YVRLR     OLHXN     JXCLP     FJFHS     XOOBN  \
AAASKYLDR  0.583288  0.869696  0.337184  0.777422  0.577689  0.061578   
DYUBIRUER  0.264433  0.653720  0.369235  0.415435  0.085363  0.391574   
GTVXFFTSR  0.410416  0.957459  0.580190  0.316355  0.035407  0.346374   
MPILUWTKU  0.793860  0.637038  0.859623  0.864524  0.112545  0.691766   
BXLOSXZBD  0.481778  0.271875  0.268117  0.785009  0.119547  0.221122   

              AXYTF  
AAASKYLDR  0.391267  
DYUBIRUER  0.366597  
GTVXFFTSR  0.932971  
MPILUWTKU  0.989345  
BXLOSXZBD  0.573399  

Select Genes: ['BXLOSXZBD', 'IOSIJZWXG', 'SACSCUYCP', 'RXNAMIEOD', 'ARARTETGK', 'AAASKYLDR', 'KONMCBVOY', 'GTVXFFTSR', 'KSMJCSFMM', 'NWGZQVHUL', 'MPILUWTKU', 'RWZKNXXLG', 'ZNUSAPEMV']


Unnamed: 0,UDDSE,YVRLR,OLHXN,JXCLP,FJFHS,XOOBN,AXYTF
MPILUWTKU,0.79386,0.637038,0.859623,0.864524,0.112545,0.691766,0.989345
KSMJCSFMM,0.599884,0.000296,0.691951,0.392975,0.139808,0.201443,0.420018
IOSIJZWXG,0.300951,0.979444,0.977429,0.860223,0.753513,0.475363,0.42619
BXLOSXZBD,0.481778,0.271875,0.268117,0.785009,0.119547,0.221122,0.573399
AAASKYLDR,0.583288,0.869696,0.337184,0.777422,0.577689,0.061578,0.391267


<a id='merge_df'></a>

### merged_df = merge_df(spreadsheet_1_df, spreadsheet_2_df) [[top]](#top)

In [7]:
spreadsheet_1_df = pd.read_csv(os.path.join(data_dir, 'spreadsheet_One.txt'), sep='\t', header=0, index_col=0)
print(spreadsheet_1_df.head())
spreadsheet_2_df = pd.read_csv(os.path.join(data_dir, 'spreadsheet_Two.txt'), sep='\t', header=0, index_col=0)
print(spreadsheet_2_df.head())

merged_df = mpt.merge_df(spreadsheet_1_df, spreadsheet_2_df)
print(merged_df.head)

              days_to_death  days_to_last  days_survival  diag_age  \
sample_id                                                            
TCGA-A5-A0G1         3251.0           NaN           3251        67   
TCGA-A5-A0G3            NaN        1079.0           1079        61   
TCGA-A5-A0G5            NaN         790.0            790        73   
TCGA-A5-A0GA          543.0           NaN            543        67   
TCGA-A5-A0GB            NaN         275.0            275        65   

                                   race               ethnicity  gender  
sample_id                                                                
TCGA-A5-A0G1                      white  not hispanic or latino  female  
TCGA-A5-A0G3  black or african american                     NaN  female  
TCGA-A5-A0G5  black or african american                     NaN  female  
TCGA-A5-A0GA                      white  not hispanic or latino  female  
TCGA-A5-A0GB                      white                     NaN  

<a id='cluster_statistics_df'></a>

### sub_df = cluster_statistics_df(spreadsheet_df, labels_df, centroid_statistic) [[top]](#top)
* labels_df is the columns selector for the spreadsheet.
* "centroid_statistic" is either "median", "std" or else: "mean" will be used.

#### Note that if the labels_df is from samples clustering output it requires column naming with pandas read_csv.

In [8]:
spreadsheet_filename = 'gene_samples_small.tsv'
spreadsheet_filename = os.path.join(data_dir, spreadsheet_filename)
spreadsheet_df = pd.read_csv(spreadsheet_filename, sep='\t', index_col=0, header=0)
print(spreadsheet_df.head())
phenotype_filename = 'gene_samples_small_labels.tsv'
phenotype_filename = os.path.join(data_dir, phenotype_filename)
phenotype_df = pd.read_csv(phenotype_filename, sep='\t', index_col=0, names=['sample','cluster_number'])
print(phenotype_df.head())

centroid_statistic = 'median'
sub_df = mpt.cluster_statistics_df(spreadsheet_df, phenotype_df, centroid_statistic)
sub_df.head()

              SALWI     JUUBB     QGAER     HPEAZ     ZEPZZ     WEVTL  \
BLAKYEFOI  0.718229  0.682086  0.367574  0.401972  0.798403  0.177443   
YGIMEGNPA  0.489176  0.252566  0.600857  0.221937  0.730823  0.272774   
YZRSYTJXA  0.548217  0.191769  0.423285  0.085875  0.420127  0.047882   
EKKHUWNYK  0.172777  0.910728  0.911077  0.640875  0.950508  0.436302   
CEDCXDTUJ  0.891059  0.266609  0.825600  0.303836  0.094569  0.639886   

              CHQAX     SCXTU     FZMUG     YYYZO     JWEIL  
BLAKYEFOI  0.639741  0.735678  0.995612  0.523272  0.289260  
YGIMEGNPA  0.596892  0.122799  0.930491  0.099120  0.138238  
YZRSYTJXA  0.931763  0.433657  0.441884  0.024010  0.671550  
EKKHUWNYK  0.523115  0.590391  0.542579  0.924989  0.283259  
CEDCXDTUJ  0.658354  0.343014  0.909111  0.704618  0.142060  
        cluster_number
sample                
SALWI                0
JUUBB                1
QGAER                2
HPEAZ                1
ZEPZZ                2


Unnamed: 0,0,1,2
BLAKYEFOI,0.581506,0.682086,0.551626
YGIMEGNPA,0.348006,0.252566,0.436816
YZRSYTJXA,0.477887,0.441884,0.421706
EKKHUWNYK,0.724052,0.542579,0.750734
CEDCXDTUJ,0.681486,0.303836,0.49145


# Basic transformations: dataframe to dataframe


<a id='transpose_df'></a>

## trans_df = transpose_df(spreadsheet_df) [[top]](#top)

In [9]:
td_file = 'gene_samples_1.tsv'
t_df = pd.read_csv(os.path.join(data_dir, td_file), sep='\t', index_col=0, header=0)
print(td_file, ':\n', t_df.head())
trans_df = mpt.transpose_df(t_df)
trans_df.head()

gene_samples_1.tsv :
               UDDSE     YVRLR     OLHXN     JXCLP     FJFHS     XOOBN  \
AAASKYLDR  0.583288  0.869696  0.337184  0.777422  0.577689  0.061578   
DYUBIRUER  0.264433  0.653720  0.369235  0.415435  0.085363  0.391574   
GTVXFFTSR  0.410416  0.957459  0.580190  0.316355  0.035407  0.346374   
MPILUWTKU  0.793860  0.637038  0.859623  0.864524  0.112545  0.691766   
BXLOSXZBD  0.481778  0.271875  0.268117  0.785009  0.119547  0.221122   

              AXYTF  
AAASKYLDR  0.391267  
DYUBIRUER  0.366597  
GTVXFFTSR  0.932971  
MPILUWTKU  0.989345  
BXLOSXZBD  0.573399  


Unnamed: 0,AAASKYLDR,DYUBIRUER,GTVXFFTSR,MPILUWTKU,BXLOSXZBD,ZNUSAPEMV,KSMJCSFMM,QEWEVQWNP,KONMCBVOY,PMAZCXIPY,RWZKNXXLG,ARARTETGK,RXNAMIEOD,SACSCUYCP,IOSIJZWXG,NWGZQVHUL
UDDSE,0.583288,0.264433,0.410416,0.79386,0.481778,0.604271,0.599884,0.26076,0.568654,0.292753,0.886983,0.560096,0.347432,0.898009,0.300951,0.819528
YVRLR,0.869696,0.65372,0.957459,0.637038,0.271875,0.641702,0.000296,0.725221,0.182183,0.237518,0.639404,0.603171,0.999505,0.840288,0.979444,0.82311
OLHXN,0.337184,0.369235,0.58019,0.859623,0.268117,0.132654,0.691951,0.215147,0.507811,0.586945,0.185245,0.574982,0.51005,0.716022,0.977429,0.901851
JXCLP,0.777422,0.415435,0.316355,0.864524,0.785009,0.601822,0.392975,0.588376,0.269978,0.073814,0.386716,0.418067,0.65521,0.612684,0.860223,0.4578
FJFHS,0.577689,0.085363,0.035407,0.112545,0.119547,0.369204,0.139808,0.611777,0.925807,0.998298,0.036916,0.765465,0.618122,0.449568,0.753513,0.877375


<a id='abs_df'></a>

### abs_val_df = abs_df(spreadsheet_df) [[top]](#top)
* get the magnitude of a spreadsheet (dataframe)

In [10]:
td_file = 'gene_samples_1.tsv'
t_df = pd.read_csv(os.path.join(data_dir, td_file), sep='\t', index_col=0, header=0)
print(td_file)
# print(t_df.head())
abs_val_df = mpt.abs_df(t_df)
abs_val_df.head()

gene_samples_1.tsv


Unnamed: 0,UDDSE,YVRLR,OLHXN,JXCLP,FJFHS,XOOBN,AXYTF
AAASKYLDR,0.583288,0.869696,0.337184,0.777422,0.577689,0.061578,0.391267
DYUBIRUER,0.264433,0.65372,0.369235,0.415435,0.085363,0.391574,0.366597
GTVXFFTSR,0.410416,0.957459,0.58019,0.316355,0.035407,0.346374,0.932971
MPILUWTKU,0.79386,0.637038,0.859623,0.864524,0.112545,0.691766,0.989345
BXLOSXZBD,0.481778,0.271875,0.268117,0.785009,0.119547,0.221122,0.573399


<a id='z_transform_df'></a>

### z_scored_df = z_transform_df(spreadsheet_df, axis=1) [[top]](#top)

In [11]:
td_file = 'gene_samples_1.tsv'
t_df = pd.read_csv(os.path.join(data_dir, td_file), sep='\t', index_col=0, header=0)
print(td_file, ':\n', t_df.head())
z_score_df = mpt.z_transform_df(t_df, axis=1)
z_score_df.head()

gene_samples_1.tsv :
               UDDSE     YVRLR     OLHXN     JXCLP     FJFHS     XOOBN  \
AAASKYLDR  0.583288  0.869696  0.337184  0.777422  0.577689  0.061578   
DYUBIRUER  0.264433  0.653720  0.369235  0.415435  0.085363  0.391574   
GTVXFFTSR  0.410416  0.957459  0.580190  0.316355  0.035407  0.346374   
MPILUWTKU  0.793860  0.637038  0.859623  0.864524  0.112545  0.691766   
BXLOSXZBD  0.481778  0.271875  0.268117  0.785009  0.119547  0.221122   

              AXYTF  
AAASKYLDR  0.391267  
DYUBIRUER  0.366597  
GTVXFFTSR  0.932971  
MPILUWTKU  0.989345  
BXLOSXZBD  0.573399  


Unnamed: 0,UDDSE,YVRLR,OLHXN,JXCLP,FJFHS,XOOBN,AXYTF
AAASKYLDR,0.583288,0.869696,0.337184,0.777422,0.577689,0.061578,0.391267
DYUBIRUER,0.264433,0.65372,0.369235,0.415435,0.085363,0.391574,0.366597
GTVXFFTSR,0.410416,0.957459,0.58019,0.316355,0.035407,0.346374,0.932971
MPILUWTKU,0.79386,0.637038,0.859623,0.864524,0.112545,0.691766,0.989345
BXLOSXZBD,0.481778,0.271875,0.268117,0.785009,0.119547,0.221122,0.573399


<a id='log_transform_df'></a>

### log_df = log_transform_df(spreadsheet_df, log_base=np.exp(1), log_offset=0) [[top]](#top)
* log_base default is the natural log
* log_offset default is 0 (none)

In [12]:
td_file = 'gene_samples_1.tsv'
t_df = pd.read_csv(os.path.join(data_dir, td_file), sep='\t', index_col=0, header=0)
print(td_file, ':\n', t_df.head())

log_df = mpt.log_transform_df(t_df)
log_df.head()

gene_samples_1.tsv :
               UDDSE     YVRLR     OLHXN     JXCLP     FJFHS     XOOBN  \
AAASKYLDR  0.583288  0.869696  0.337184  0.777422  0.577689  0.061578   
DYUBIRUER  0.264433  0.653720  0.369235  0.415435  0.085363  0.391574   
GTVXFFTSR  0.410416  0.957459  0.580190  0.316355  0.035407  0.346374   
MPILUWTKU  0.793860  0.637038  0.859623  0.864524  0.112545  0.691766   
BXLOSXZBD  0.481778  0.271875  0.268117  0.785009  0.119547  0.221122   

              AXYTF  
AAASKYLDR  0.391267  
DYUBIRUER  0.366597  
GTVXFFTSR  0.932971  
MPILUWTKU  0.989345  
BXLOSXZBD  0.573399  


Unnamed: 0,UDDSE,YVRLR,OLHXN,JXCLP,FJFHS,XOOBN,AXYTF
AAASKYLDR,0.583288,0.869696,0.337184,0.777422,0.577689,0.061578,0.391267
DYUBIRUER,0.264433,0.65372,0.369235,0.415435,0.085363,0.391574,0.366597
GTVXFFTSR,0.410416,0.957459,0.58019,0.316355,0.035407,0.346374,0.932971
MPILUWTKU,0.79386,0.637038,0.859623,0.864524,0.112545,0.691766,0.989345
BXLOSXZBD,0.481778,0.271875,0.268117,0.785009,0.119547,0.221122,0.573399


<a id='threshold_df'></a>

### thresh_df = threshold_df(spreadsheet_df, cut_off, sub_val, scope) [[top]](#top)
* Get a dataframe with values below "cut_off" set to "sub_val" unless "scope" is input as anything EXCEPT "SUB_BELOW"

In [13]:
#                           Choose a features x samples spreadsheet:
td_file = 'gene_samples_1.tsv'
t_df = pd.read_csv(os.path.join(data_dir, td_file), sep='\t', index_col=0, header=0)
# print(td_file, ':\n', t_df.head())
threshold = 0.3
threshold_df = mpt.threshold_df(t_df, cut_off=threshold, sub_val=0, scope='SUB_BELOW')
threshold_df.head()

Unnamed: 0,UDDSE,YVRLR,OLHXN,JXCLP,FJFHS,XOOBN,AXYTF
AAASKYLDR,0.583288,0.869696,0.337184,0.777422,0.577689,0.0,0.391267
DYUBIRUER,0.0,0.65372,0.369235,0.415435,0.0,0.391574,0.366597
GTVXFFTSR,0.410416,0.957459,0.58019,0.316355,0.0,0.346374,0.932971
MPILUWTKU,0.79386,0.637038,0.859623,0.864524,0.0,0.691766,0.989345
BXLOSXZBD,0.481778,0.0,0.0,0.785009,0.0,0.0,0.573399


<a id='stats_df'></a>

### report_df = stats_df(t_df, stats_function=stat, direction_reference=dr)  [[top]](#top)
* get a dataframe of requested stat as specified by the function and direction.
* function signiture:
    * stats_dataframe = stats_df(input_df, stats_function, direction_reference)
    * output will be a dataframe even if it only contains one value
* possible functions are: ['min', 'max', 'mean', 'median', 'variation', 'std_deviation', 'sum']
* possible directions are: ['columns', 'rows', 'all']

In [14]:
#                           Choose a features x samples spreadsheet:
td_file = 'gene_samples_1.tsv'
t_df = pd.read_csv(os.path.join(data_dir, td_file), sep='\t', index_col=0, header=0)
print(td_file, ':\n', t_df.head())

stat_f_list = ['min', 'max', 'mean', 'median', 'variation', 'std_deviation', 'sum']
dir_list = ['columns', 'rows', 'all']
for stat in stat_f_list:
    for dr in dir_list:
        print('\n\n\t\tstats_function =', stat, ': direction_reference =', dr)
        ho_df = mpt.stats_df(t_df, stats_function=stat, direction_reference=dr)
        print(ho_df)

gene_samples_1.tsv :
               UDDSE     YVRLR     OLHXN     JXCLP     FJFHS     XOOBN  \
AAASKYLDR  0.583288  0.869696  0.337184  0.777422  0.577689  0.061578   
DYUBIRUER  0.264433  0.653720  0.369235  0.415435  0.085363  0.391574   
GTVXFFTSR  0.410416  0.957459  0.580190  0.316355  0.035407  0.346374   
MPILUWTKU  0.793860  0.637038  0.859623  0.864524  0.112545  0.691766   
BXLOSXZBD  0.481778  0.271875  0.268117  0.785009  0.119547  0.221122   

              AXYTF  
AAASKYLDR  0.391267  
DYUBIRUER  0.366597  
GTVXFFTSR  0.932971  
MPILUWTKU  0.989345  
BXLOSXZBD  0.573399  


		stats_function = min : direction_reference = columns
       UDDSE     YVRLR     OLHXN     JXCLP     FJFHS     XOOBN     AXYTF
min  0.26076  0.000296  0.132654  0.073814  0.035407  0.017905  0.002284


		stats_function = min : direction_reference = rows
                min
AAASKYLDR  0.061578
DYUBIRUER  0.085363
GTVXFFTSR  0.035407
MPILUWTKU  0.112545
BXLOSXZBD  0.119547
ZNUSAPEMV  0.002284
KSMJCSFMM 

# Utilities:

<a id="write_transform_df"></a>

### write_transform_df(spreadsheet_df, spreadsheet_file_name, transform_name, results_directory) [[Top]](#top)
* construct the filename and write the spreadsheet

In [15]:
spreadsheet_name_full_path = os.path.join(data_dir, 'gene_samples_1.tsv')
transform_name = 'transformation_X'

spreadsheet_df = pd.read_csv(spreadsheet_name_full_path, sep='\t', index_col=0, header=0)

mpt.write_transform_df(spreadsheet_df, spreadsheet_name_full_path, transform_name, results_dir)
os.listdir(results_dir)

['gene_samples_1_transformation_X_Mon_11_Sep_2017_08_10_10.501739025.tsv']

<a id='read_a_list_file'></a>

## python_dakine_list = read_a_list_file(list_file_name) [[top]](#top)

In [16]:
list_file_name = 'gene_samples_1_list.txt'
list_file_name = os.path.join(data_dir, list_file_name)

some_list = mpt.read_a_list_file(list_file_name)
for list_name in some_list:
    print(list_name)

BXLOSXZBD
IOSIJZWXG
SACSCUYCP
RXNAMIEOD
ARARTETGK
AAASKYLDR
KONMCBVOY
GTVXFFTSR
KSMJCSFMM
NWGZQVHUL
MPILUWTKU
RWZKNXXLG
ZNUSAPEMV


<a id='get_outfile_name'></a>

## full_file_name = get_outfile_name(write_dir, file_name, xform_name, file_ext) [[top]](#top)

In [17]:
destination_dir = '../../tmp_result'
spreadsheet_file_name = 'maybe_fileWasXformed'
transform_name = 'transformation_X'
full_file_name = mpt.get_outfile_name(destination_dir, spreadsheet_file_name, transform_name, file_ext='.tsv')
print(full_file_name)

../../tmp_result/maybe_fileWasXformed_transformation_X_Mon_11_Sep_2017_08_10_10.519591093..tsv
