In [2]:
# enables access to directories/files
import os

# general purpose modules for handling data
import numpy as np
from numpy import array
import pandas as pd
from pandas import ExcelWriter
from pandas import ExcelFile

# custom module for handling telomere length data
import telomere_methods_astros as telo_ma

---
##### &nbsp; 

The telomere length measurements by telomere FISH are stored in individual excel sheets 
per timepoint per individual. Thus, we have roughly 200 excel sheets to extract data from. 
Data will be extracted into a dict from which a dataframe is created and saved as a .csv for later use.

##### &nbsp; 
---

### Extracting telomere length data by FISH for all astronauts into a dict

In [4]:
# this function pulls individual telomere length data from the excel sheets containing
# ImageJ telometer data. DAPI, missing values, and values greater than 
# 3 standard devs from the mean are purged. all telo measurements
# are standardized according to microscope imaged w/, using Cy3 bead values
# to control for microscope intensity differences

dict_astro_individ_telos_dfs = telo_ma.generate_dictionary_for_telomere_length_data(
    '../../names cleaned every astro telo excel sheet')


dso7673_L-60.xlsx telomere data acquisition in progress..
dso7673_R+270.xlsx telomere data acquisition in progress..
dso2381_L-60.xlsx telomere data acquisition in progress..
dso2494_R+7.xlsx telomere data acquisition in progress..
dso2494_L-270.xlsx telomere data acquisition in progress..
dso2381_R+180.xlsx telomere data acquisition in progress..
dso1536_R+60.xlsx telomere data acquisition in progress..
dso2494_L-180.xlsx telomere data acquisition in progress..
dso1536_R+7.xlsx telomere data acquisition in progress..
dso1062_R+7.xlsx telomere data acquisition in progress..
dso2381_R+270.xlsx telomere data acquisition in progress..
dso7673_R+180.xlsx telomere data acquisition in progress..
dso2171_L-180.xlsx telomere data acquisition in progress..
dso1536_FD140.xlsx telomere data acquisition in progress..
dso1536_L-60.xlsx telomere data acquisition in progress..
dso3228_R+180.xlsx telomere data acquisition in progress..
dso4819_L-180.xlsx telomere data acquisition in progress..
dso3228

### Making a dataframe from the dict

In [12]:
# takes data from above dict, standardizes # of telomeres according to theoretical max
# for these samples (184 per metaphase, 30 metaphases each sample) using
# random sampling from data per sample
# makes dataframe

astro_df = telo_ma.make_astronaut_dataframe(dict_astro_individ_telos_dfs)
astro_df.head(2)

Unnamed: 0,astro number,astro id,timepoint,flight status,telo data,telo means,Q1,Q2-3,Q4
0,1,5163,L-270,Pre-Flight,0 83.177414 1 73.103909 2 ...,87.67212,telos preF Q1 <0.25,telos preF Q2-3 >0.25 & <0.75,telos preF Q4 >0.75
1,1,5163,L-180,Pre-Flight,0 63.397928 1 75.158704 2 ...,101.077756,telos preF Q1 <0.25,telos preF Q2-3 >0.25 & <0.75,telos preF Q4 >0.75


### Saving all astros telo dataframe to csv for later retrieval

In [10]:
copy_astro_df = astro_df
copy_astro_df['telo data'] = copy_astro_df['telo data'].apply(lambda row: row.tolist())

copy_astro_df.to_csv('../excel data/All_astronauts_telomere_length_dataframe.csv', index = False)

### Extracting telomere length data by FISH for all astronaut controls into a dict

In [11]:
dict_ctrl_individ_telos_dfs = telo_ma.grab_control_values_generate_dictionary('../../control files')

tsf3907 mphase TeloFISH ___ L-180.xlsx IT WORKS PEGGY!!! <3
tsf2580 mphase TeloFISH L-180.xlsx IT WORKS PEGGY!!! <3
tsf3609 mphase TeloFISH R+7.xlsx IT WORKS PEGGY!!! <3
tsf3609 mphase TeloFISH L-270.xlsx IT WORKS PEGGY!!! <3
tsf4127 mphase TeloFISH R+7.xlsx IT WORKS PEGGY!!! <3
tsf1264 mphase TeloFISH R+180.xlsx IT WORKS PEGGY!!! <3
tsf1264 mphase TeloFISH R+270.xlsx IT WORKS PEGGY!!! <3
tsf3609 mphase TeloFISH L-180.xlsx IT WORKS PEGGY!!! <3
tsf3907 mphase TeloFISH ___ L-270.xlsx IT WORKS PEGGY!!! <3
tsf2580 mphase TeloFISH L-270.xlsx IT WORKS PEGGY!!! <3
tsf1826 mphase TeloFISH ___ R+7.xlsx IT WORKS PEGGY!!! <3
tsf0397 mphase TeloFISH R+180.xlsx IT WORKS PEGGY!!! <3
TSF0646_R+7.xlsx IT WORKS PEGGY!!! <3
TSF0912_FD45.xlsx IT WORKS PEGGY!!! <3
tsf0397 mphase TeloFISH R+270.xlsx IT WORKS PEGGY!!! <3
TSF0912_R+270.xlsx IT WORKS PEGGY!!! <3
tsf1264 mphase TeloFISH R+60.xlsx IT WORKS PEGGY!!! <3
tsf4127 mphase TeloFISH R+180.xlsx IT WORKS PEGGY!!! <3
tsf4127 mphase TeloFISH R+270.xlsx IT 

### Making dataframe from dict

In [13]:
# same as above

control_df = telo_ma.make_control_dataframe(dict_ctrl_individ_telos_dfs)
control_df['flight status'] = 'Controls'
control_df.head(2)

Unnamed: 0,control id,timepoint,flight status controls,telo data,telo means,flight status
0,100,L-270,Pre-Flight,0 124.022720 1 80.905446 2 ...,117.607812,Controls
1,100,L-180,Pre-Flight,0 117.490812 1 158.703642 2 ...,146.984758,Controls


### Saving all astro controls telo data for later retrieval

In [14]:
copy_control_df = control_df
copy_control_df['telo data'] = copy_control_df['telo data'].apply(lambda row: row.tolist())
copy_control_df.to_csv('../excel data/All_astro_controls_telomere_length_dataframe.csv', index=False)

### Reading in astronaut urine/blood biochemical data

In [6]:
biochem_data = pd.read_excel('../excel data/Biochemistry means_SD n11 Telomere subjects.xlsx')
biochem_data.shape

(42, 11)

### Processing urine biochem data 

In [14]:
urine_biochem_data = biochem_data.iloc[0:4, 0:11]
urine_biochem_data.rename(columns={'Urine Biochemistry (n=11, not all time points have all 11 subjects though)': 'biochemistry'}, inplace=True)
urine_biochem_data['sample type'] = 'urine'
urine_biochem_data

Unnamed: 0,biochemistry,Pre,FD15,FD30,FD60,FD120,FD180,R+0 day 1,R+0 day 2,R+30 day 1,R+30 day 2,sample type
0,"8-OHdG, Urine ug/gCr",2.6 ± 1,3.1 ± 1,3 ± 1,3.6 ± 1.2,3 ± 1,2.5 ± 0.6,2.4 ± 0.8,2.4 ± 1,1.7 ± 0.7,1.8 ± 1,urine
1,"Copper, Urine umol/day",0.28 ± 0.17,0.24 ± 0.04,0.19 ± 0.02,0.24 ± 0.04,0.21 ± 0.05,0.24 ± 0.08,0.58 ± 0.71,0.25 ± 0.07,0.26 ± 0.1,0.34 ± 0.32,urine
2,PGF2-alpha ng/mg Cr,1.84 ± 1.2,2.22 ± 0.94,2.48 ± 1.05,2.92 ± 1.38,2.7 ± 1.26,3.12 ± 1.14,1.53 ± 1.06,1.59 ± 0.91,1.88 ± 1.12,1.72 ± 1.11,urine
3,"Selenium, Urine umol/day",1.02 ± 0.39,1.11 ± 0.35,1.08 ± 0.3,1.26 ± 0.54,1.14 ± 0.42,1.19 ± 0.42,0.88 ± 0.21,0.94 ± 0.27,0.96 ± 0.31,1.03 ± 0.41,urine


### Processing blood biochem data

In [71]:
blood_biochem_data = pd.read_excel('../excel data/Biochemistry means_SD n11 Telomere subjects.xlsx', skiprows=5)
blood_biochem_data = blood_biochem_data.iloc[:, 0:9]
blood_biochem_data.rename(columns={'Blood Biochemistry  (n=11, not all time points have all 11 subjects though)': 'biochemistry'}, inplace=True)
blood_biochem_data['sample type'] = 'blood'
blood_biochem_data.head(4)

Unnamed: 0,biochemistry,Pre,FD15,FD30,FD60,FD120,FD180,R+0,R+180,sample type
0,CCL2/MCP-1 pg/ml,83 ± 17,89 ± 21,94 ± 17,96 ± 26,96 ± 25,93 ± 22,182 ± 146,87 ± 23,blood
1,CCL3/MIP-1a pg/ml,438 ± 194,644 ± 214,652 ± 129,642 ± 218,489 ± 89,641 ± 247,310 ± 96,388 ± 105,blood
2,CCL4/MIP1B pg/ml,54 ± 17,75 ± 27,101 ± 34,75 ± 34,75 ± 37,70 ± 28,51 ± 16,48 ± 14,blood
3,CCL5/RANTES pg/ml,6902 ± 4393,15462 ± 2388,17986 ± 601,15022 ± 2726,14227 ± 2755,13636 ± 3964,3890 ± 3563,5569 ± 2539,blood


In [74]:
urine_biochem_data.to_csv('../excel data/urine_biochem_data.csv', index=False)
blood_biochem_data.to_csv('../excel data/blood_biochem_data.csv', index=False)