In [1]:
# enables access to directories/files
import os

# general purpose modules for handling data
import numpy as np
from numpy import array
import pandas as pd
from pandas import ExcelWriter
from pandas import ExcelFile

# custom module for handling telomere length data
import telomere_methods_astros as telo_ma

---
&nbsp; 

The telomere length measurements by telomere FISH are stored in individual excel sheets 
per timepoint per individual. Thus, we have roughly 200 excel sheets to extract data from. 
Data will be extracted into a dict from which a dataframe is created and saved as a .csv for later use.

&nbsp;

---

# Extracting telomere length data by FISH for all astronauts into a dict

In [2]:
# ls '../data/raw data/'

In [3]:
# this function pulls individual telomere length data from the excel sheets containing
# ImageJ telometer data. DAPI, missing values, and values greater than 
# 3 standard devs from the mean are purged. all telo measurements
# are standardized according to microscope imaged w/, using Cy3 bead values
# to control for microscope intensity differences

dict_astro_individ_telos_dfs = telo_ma.generate_dictionary_for_telomere_length_data(
    '../data/raw data/astros telo data/')


dso7673_L-60.xlsx telomere data acquisition in progress..
dso7673_R+270.xlsx telomere data acquisition in progress..
dso2381_L-60.xlsx telomere data acquisition in progress..
dso2494_R+7.xlsx telomere data acquisition in progress..
dso2494_L-270.xlsx telomere data acquisition in progress..
dso2381_R+180.xlsx telomere data acquisition in progress..
dso1536_R+60.xlsx telomere data acquisition in progress..
dso2494_L-180.xlsx telomere data acquisition in progress..
dso1536_R+7.xlsx telomere data acquisition in progress..
dso1062_R+7.xlsx telomere data acquisition in progress..
dso2381_R+270.xlsx telomere data acquisition in progress..
dso7673_R+180.xlsx telomere data acquisition in progress..
dso2171_L-180.xlsx telomere data acquisition in progress..
dso1536_FD140.xlsx telomere data acquisition in progress..
dso1536_L-60.xlsx telomere data acquisition in progress..
dso3228_R+180.xlsx telomere data acquisition in progress..
dso4819_L-180.xlsx telomere data acquisition in progress..
dso3228

## Making a dataframe from the dict

In [4]:
# takes data from above dict, standardizes # of telomeres according to theoretical max
# for these samples (184 per metaphase, 30 metaphases each sample) using
# random sampling from data per sample
# makes dataframe

astro_df = telo_ma.make_astronaut_dataframe(dict_astro_individ_telos_dfs)
astro_df.head(4)

Unnamed: 0,astro number,astro id,timepoint,flight status,telo data,telo means,Q1,Q2-3,Q4
0,1,5163,L-270,Pre-Flight,0 132.793184 1 73.621784 2 ...,87.67212,telos preF Q1 <0.25,telos preF Q2-3 >0.25 & <0.75,telos preF Q4 >0.75
1,1,5163,L-180,Pre-Flight,0 72.034748 1 85.833612 2 ...,101.077756,telos preF Q1 <0.25,telos preF Q2-3 >0.25 & <0.75,telos preF Q4 >0.75
2,1,5163,L-60,Pre-Flight,0 89.558971 1 95.806883 2 ...,128.599235,telos preF Q1 <0.25,telos preF Q2-3 >0.25 & <0.75,telos preF Q4 >0.75
3,1,5163,FD90,Mid-Flight,0 77.530905 1 71.383228 2 ...,101.183129,telos preF Q1 <0.25,telos preF Q2-3 >0.25 & <0.75,telos preF Q4 >0.75


## Saving all astros telo dataframe to csv for later retrieval

In [5]:
copy_astro_df = astro_df
copy_astro_df['telo data'] = copy_astro_df['telo data'].apply(lambda row: row.tolist())

copy_astro_df.to_csv('../data/compiled data/All_astronauts_telomere_length_dataframe.csv', index = False)

## Reading in astronaut telomere length data per cell

In [6]:
import importlib
%load_ext autoreload
%autoreload 2
%reload_ext autoreload

In [9]:
dict_astro_cell_telos_dfs = telo_ma.grab_astro_telo_values_per_cell_generate_dictionary(
    '../data/raw data/astros telo data/')

dso7673_L-60.xlsx telomere data acquisition in progress..
dso7673_R+270.xlsx telomere data acquisition in progress..
dso2381_L-60.xlsx telomere data acquisition in progress..
dso2494_R+7.xlsx telomere data acquisition in progress..
dso2494_L-270.xlsx telomere data acquisition in progress..
dso2381_R+180.xlsx telomere data acquisition in progress..
dso1536_R+60.xlsx telomere data acquisition in progress..
dso2494_L-180.xlsx telomere data acquisition in progress..
dso1536_R+7.xlsx telomere data acquisition in progress..
dso1062_R+7.xlsx telomere data acquisition in progress..
dso2381_R+270.xlsx telomere data acquisition in progress..
dso7673_R+180.xlsx telomere data acquisition in progress..
dso2171_L-180.xlsx telomere data acquisition in progress..
dso1536_FD140.xlsx telomere data acquisition in progress..
dso1536_L-60.xlsx telomere data acquisition in progress..
dso3228_R+180.xlsx telomere data acquisition in progress..
dso4819_L-180.xlsx telomere data acquisition in progress..
dso3228

In [10]:
astro_cells_df = telo_ma.make_astronaut_cell_data_dataframe(dict_astro_cell_telos_dfs)

In [None]:
astro_cells_df['telo data per cell'] = astro_cells_df['telo data per cell'].apply(lambda row: row.tolist())
astro_cells_df.drop(['Q1', 'Q2-3', 'Q4'], axis=1, inplace=True)

In [None]:
astro_cells_df.to_csv('../data/compiled data/All_astronauts_telomere_length_per_cell_dataframe.csv', index = False)

# Extracting telomere length data by FISH for all astro controls into a dict

In [None]:
dict_ctrl_individ_telos_dfs = telo_ma.grab_control_values_generate_dictionary('../data/raw data/controls telo data')

## Making dataframe from all controls dict

In [None]:
# same as above

control_df = telo_ma.make_control_cell_data_dataframe(dict_ctrl_individ_telos_dfs)
control_df['flight status'] = 'Controls'
control_df.head(2)

## Saving all astro controls telo data for later retrieval

In [None]:
copy_control_df = control_df
copy_control_df['telo data'] = copy_control_df['telo data'].apply(lambda row: row.tolist())
copy_control_df.to_csv('../data/compiled data/All_astro_controls_telomere_length_dataframe.csv', index=False)

## Reading in control telomere length data per cell, saving to file

In [None]:
dict_control_cell_telos_dfs = telo_ma.grab_control_telo_values_per_cell_generate_dictionary(
    '../data/raw data/controls telo data/')

In [None]:
ctrl_cells_df = telo_ma.make_control_cell_data_dataframe(dict_control_cell_telos_dfs)
ctrl_cells_df['telo data per cell'] = ctrl_cells_df['telo data per cell'].apply(lambda row: row.tolist())

In [None]:
ctrl_cells_df.to_csv('../data/compiled data/All_controls_telomere_length_per_cell_dataframe.csv', index = False)

# Reading in astronaut urine/blood biochemical data

In [None]:
biochem_data = pd.read_excel('../data/raw data/Biochemistry means_SD n11 Telomere subjects.xlsx')
biochem_data.shape

## Processing urine biochem data (n=11)

In [None]:
urine_biochem_data = biochem_data.iloc[0:4, 0:11]
urine_biochem_data.rename(columns={'Urine Biochemistry (n=11, not all time points have all 11 subjects though)': 'biochemistry'}, inplace=True)
urine_biochem_data['sample type'] = 'urine'
urine_biochem_data

## Processing blood biochem data (n=11)

In [15]:
blood_biochem_data = pd.read_excel('../data/raw data/Biochemistry means_SD n11 Telomere subjects.xlsx', skiprows=5)
blood_biochem_data = blood_biochem_data.iloc[:, 0:9]
blood_biochem_data.rename(columns={'Blood Biochemistry  (n=11, not all time points have all 11 subjects though)': 'biochemistry'}, inplace=True)
blood_biochem_data['sample type'] = 'blood'
blood_biochem_data.head(4)

Unnamed: 0,biochemistry,Pre,FD15,FD30,FD60,FD120,FD180,R+0,R+180,sample type
0,CCL2/MCP-1 pg/ml,83 ± 17,89 ± 21,94 ± 17,96 ± 26,96 ± 25,93 ± 22,182 ± 146,87 ± 23,blood
1,CCL3/MIP-1a pg/ml,438 ± 194,644 ± 214,652 ± 129,642 ± 218,489 ± 89,641 ± 247,310 ± 96,388 ± 105,blood
2,CCL4/MIP1B pg/ml,54 ± 17,75 ± 27,101 ± 34,75 ± 34,75 ± 37,70 ± 28,51 ± 16,48 ± 14,blood
3,CCL5/RANTES pg/ml,6902 ± 4393,15462 ± 2388,17986 ± 601,15022 ± 2726,14227 ± 2755,13636 ± 3964,3890 ± 3563,5569 ± 2539,blood


## Saving the biochem data to csv's

In [16]:
urine_biochem_data.to_csv('../data/compiled data/urine_biochem_data.csv', index=False)
blood_biochem_data.to_csv('../data/compiled data/blood_biochem_data.csv', index=False)