# Import, filter, and save EDD data
EDD has a large amount of unnecessary data (biolecter and proteomics). Download studies, filter out everything but isoprenol, and save locally to save time.

In [1]:
import edd_utils as eddu
import pandas as pd

## Download the data

These are the [Experiment Data Depot](https://pubs.acs.org/doi/full/10.1021/acssynbio.7b00204) server, the corresponding username and the slug (address) for the study to be downloaded:

In [2]:
study_slug_dbtl0 = 'corrected-crispri-automation-for-enhanced-isopreno'
study_slug_dbtl1 = 'crispri-automation-for-enhanced-isoprenol-producti'
study_slug_dbtl2 = 'crispri-automation-for-enhanced-isoprenol-pro-05e7'
study_slug_dbtl3 = 'crispri-automation-for-enhanced-isoprenol-pro-a97b'
study_slug_dbtl4 = 'crispri-automation-for-enhanced-isoprenol-pro-9d3d'
study_slug_dbtl5 = 'crispri-automation-for-enhanced-isoprenol-pro-271b'
study_slug_dbtl6 = 'crispri-automation-for-enhanced-isoprenol-pro-6e5e'

edd_server   = 'edd.jbei.org'
username     = 'pckinnunen'

We try connecting to the server with our login and password:

In [3]:
try:
    session = eddu.login(edd_server=edd_server, user=username)
except:
    print('ERROR! Connection to EDD failed. We will try to load data from disk...')
else:
    print('OK! Connection to EDD successful. We will try to load data from EDD...')

Password for pckinnunen:  ········


OK! Connection to EDD successful. We will try to load data from EDD...


And then we try to export the study from the EDD instance:

In [4]:
# try:
#     df_dbtl0 = eddu.export_study(session, study_slug_dbtl0, edd_server=edd_server)
# except (NameError, AttributeError, KeyError):
#     print(f'ERROR! Not able to export DBTL0 study.')

In [5]:
# try:
#     df_dbtl1 = eddu.export_study(session, study_slug_dbtl1, edd_server = edd_server)
# except (NameError, AttributeError, KeyError):
#     print(f'ERROR! Not able to export DBTL1 study.')    

In [6]:
# try:
#     df_dbtl2 = eddu.export_study(session, study_slug_dbtl2, edd_server = edd_server)
# except (NameError, AttributeError, KeyError):
#     print(f'ERROR! Not able to export DBTL2 study.')   

In [7]:
# try:
#     df_dbtl3 = eddu.export_study(session, study_slug_dbtl3, edd_server = edd_server)
# except (NameError, AttributeError, KeyError):
#     print(f'ERROR! Not able to export DBTL3 study.')   

In [8]:
# try:
#     df_dbtl4 = eddu.export_study(session, study_slug_dbtl4, edd_server = edd_server)
# except (NameError, AttributeError, KeyError):
#     print(f'ERROR! Not able to export DBTL4 study.')   

In [9]:
# try:
#     df_dbtl5 = eddu.export_study(session, study_slug_dbtl5, edd_server = edd_server)
# except (NameError, AttributeError, KeyError):
#     print(f'ERROR! Not able to export DBTL5 study.')   

In [10]:
try:
    df_dbtl6 = eddu.export_study(session, study_slug_dbtl6, edd_server = edd_server)
except (NameError, AttributeError, KeyError):
    print(f'ERROR! Not able to export DBTL6 study.')   

  0%|          | 0/398001 [00:00<?, ?it/s]

There are lots of data here (particularly the biolector data takes a lot of space):

Let's have a look at the different protocols (types of data) that are includeed in the study:

In [11]:
# df_dbtl0['Protocol'].unique()

In [12]:
# df_dbtl1['Protocol'].unique()

In [13]:
# df_dbtl2['Protocol'].unique()

In [14]:
# df_dbtl3['Protocol'].unique()

In [15]:
# df_dbtl4['Protocol'].unique()

In [16]:
# df_dbtl5['Protocol'].unique()

In [17]:
df_dbtl6['Protocol'].unique()

array(['GC-FID', 'Global Proteomics'], dtype=object)

In [18]:
# df_dbtl0_isoprenol = df_dbtl0[df_dbtl0['Protocol'] == 'GC-FID']
# df_dbtl1_isoprenol = df_dbtl1[df_dbtl1['Protocol'] == 'GC-FID']
# df_dbtl2_isoprenol = df_dbtl2[df_dbtl2['Protocol'] == 'GC-FID']
# df_dbtl3_isoprenol = df_dbtl3[df_dbtl3['Protocol'] == 'GC-FID']
# df_dbtl4_isoprenol = df_dbtl4[df_dbtl4['Protocol'] == 'GC-FID']
# df_dbtl5_isoprenol = df_dbtl5[df_dbtl5['Protocol'] == 'GC-FID']
df_dbtl6_isoprenol = df_dbtl6[df_dbtl6['Protocol'] == 'GC-FID']

In [19]:
# df_dbtl0_isoprenol.to_pickle('./isoprenol_data/dbtl0_isoprenol.pkl')
# df_dbtl1_isoprenol.to_pickle('./isoprenol_data/dbtl1_isoprenol.pkl')
# df_dbtl2_isoprenol.to_pickle('./isoprenol_data/dbtl2_isoprenol.pkl')
# df_dbtl3_isoprenol.to_pickle('./isoprenol_data/dbtl3_isoprenol.pkl')
# df_dbtl4_isoprenol.to_pickle('./isoprenol_data/dbtl4_isoprenol.pkl')
# df_dbtl5_isoprenol.to_pickle('./isoprenol_data/dbtl5_isoprenol.pkl')
df_dbtl6_isoprenol.to_pickle('./isoprenol_data/dbtl6_isoprenol.pkl')


In [20]:
# df_dbtl0_isoprenol.to_csv('./isoprenol_data/dbtl0_isoprenol.csv')
# df_dbtl1_isoprenol.to_csv('./isoprenol_data/dbtl1_isoprenol.csv')
# df_dbtl2_isoprenol.to_csv('./isoprenol_data/dbtl2_isoprenol.csv')
# df_dbtl3_isoprenol.to_csv('./isoprenol_data/dbtl3_isoprenol.csv')
# df_dbtl4_isoprenol.to_csv('./isoprenol_data/dbtl4_isoprenol.csv')
# df_dbtl5_isoprenol.to_csv('./isoprenol_data/dbtl5_isoprenol.csv')
df_dbtl6_isoprenol.to_csv('./isoprenol_data/dbtl6_isoprenol.csv')

In [21]:
# df_dbtl4_isoprenol['line_name_norep'] = df_dbtl4_isoprenol['Line Name'].apply(lambda x: x[:-3])
# df_dbtl4_isoprenol['rep'] = df_dbtl4_isoprenol['Line Name'].apply(lambda x: int(x[-1]))

# df_dbtl4_isoprenol
# dfm = df_dbtl4_isoprenol.loc[:,['line_name_norep', 'rep', 'Value']].copy()
# dfm

In [22]:
# dfm = dfm.groupby(by = 'line_name_norep').mean().reset_index()
# dfm.loc[:, ['line_name_norep', 'Value']].to_csv('./isoprenol_data/dbtl4_merged.csv')