# Data Table Creation

With database study, was possible to gather some informations that seems relevant for the study case

In [1]:
import pandas as pd
import numpy as np

## Chosen headers by file

CHEMICAL_CARRIERS  
carrier_id
test_id
cas_number
chem_name
formulation

CHEMICALS  
cas_number
chemical_name
dose_id

DOSE_RESPONSE_DETAILS  
dose_resp_detail_id
dose_id

DOSE_RESPONSE_LINKS  
all

DOSE_RESPONSES  
dose_resp_id
test_id
effect_code
measurement_code

DOSES  
dose_id
test_id
dose1_number
dose2_mean
dose3_mean

RESULTS  
result_id
test_id
effect
measurement
conc1_mean
conc2_mean
conc3_mean

SPECIES  
species_number
common_name
kingdom
class
tax_order
family
species

TESTS  
test_id
test_cas //Foreign key to CHEMICALS lookup table.
organism_lifestage //Foreign key to LIFESTAGE_CODES
organism_age_mean
organism_gender
study_duration_mean
exposure_duration_mean
test_type //Foreign key to TEST_TYPE_CODES
num_doses_mean
application_freq_mean
halflife_mean

In [3]:
df_chemical_carriers = pd.read_csv("../data/ecotox_ascii_12_13_2018/chemical_carriers.txt",sep="|", dtype='unicode')
df_chemical_carriers = df_chemical_carriers[['carrier_id', 'test_id', 'cas_number', 'chem_name', 'formulation']]
df_chemical_carriers.iloc[:5]

Unnamed: 0,carrier_id,test_id,cas_number,chem_name,formulation
0,342653,1000060,67641,2-Propanone,NR
1,342654,1000061,67641,2-Propanone,NR
2,342655,1000062,67641,2-Propanone,NR
3,342656,1000063,67641,2-Propanone,NR
4,342657,1000064,67641,2-Propanone,NR


In [8]:
df_chemical_formulation_codes = pd.read_csv("../data/ecotox_ascii_12_13_2018/validation/chemical_formulation_codes.txt",sep="|", dtype='unicode')
df_chemical_grade_codes = pd.read_csv("../data/ecotox_ascii_12_13_2018/validation/chemical_grade_codes.txt",sep="|", dtype='unicode')
df_effect_codes = pd.read_csv("../data/ecotox_ascii_12_13_2018/validation/effect_codes.txt",sep="|", dtype='unicode')

In [10]:
df_chemicals = pd.read_csv("../data/ecotox_ascii_12_13_2018/validation/chemicals.txt",sep="|", dtype='unicode')
df_chemicals = df_chemicals[['cas_number','chemical_name']]

df_chemicals.iloc[:5]

Unnamed: 0,cas_number,chemical_name
0,50000,Formalin
1,50011,Guanidine monohydrochloride
2,50022,"11beta,16alpha-9-Fluoro-11,17,21-Trihydroxy-16..."
3,50033,"(11beta)-21-Acetyloxy)-11,17-dihydroxypregn-4-..."
4,50044,"21-(Acetyloxy)-17-hydroxypregn-4-ene-3,11,20-t..."


In [12]:
df_dose_response_details = pd.read_csv("../data/ecotox_ascii_12_13_2018/dose_response_details.txt",sep="|", dtype='unicode')
df_dose_response_details = df_dose_response_details[['dose_resp_detail_id','dose_id']]

df_dose_response_details.iloc[:5]

Unnamed: 0,dose_resp_detail_id,dose_id
0,1,2
1,2,3
2,3,4
3,4,1
4,5,2


In [14]:
df_dose_response_links = pd.read_csv("../data/ecotox_ascii_12_13_2018/dose_response_links.txt",sep="|", dtype='unicode')
df_dose_response_links = df_dose_response_links[['result_id', 'dose_resp_id']]

In [15]:
df_doses = pd.read_csv("../data/ecotox_ascii_12_13_2018/doses.txt",sep="|", dtype='unicode')
df_doses = df_doses[['dose_id','test_id','dose1_mean','dose2_mean','dose3_mean']]

df_doses.iloc[:5]

Unnamed: 0,dose_id,test_id,dose1_mean,dose2_mean,dose3_mean
0,1,1,0,,
1,2,1,5,,
2,3,1,25,,
3,4,1,125,,
4,5,2,0,,


In [17]:
df_results = pd.read_csv("../data/ecotox_ascii_12_13_2018/results.txt",sep="|", dtype='unicode')
df_results = df_results[['result_id','test_id','effect','measurement','conc1_mean','conc2_mean','conc3_mean']]

df_results.iloc[:5]

Unnamed: 0,result_id,test_id,effect,measurement,conc1_mean,conc2_mean,conc3_mean
0,20984,1143197,MOR,MORT,175,,
1,108012,1047376,POP,BMAS,15000,,
2,170646,1152742,POP,GPOP/,1400,,
3,182736,1101244,ITX,MBLT,320000,,
4,180820,1210976,CEL,RBCE,NR,,


In [21]:
df_species = pd.read_csv("../data/ecotox_ascii_12_13_2018/validation/species.txt",sep="|", dtype='unicode')
df_species = df_species[['species_number','common_name','kingdom','class','tax_order','family','species']]

In [19]:
df_tests = pd.read_csv("../data/ecotox_ascii_12_13_2018/tests.txt",sep="|", dtype='unicode')
df_tests = df_tests[['test_id','test_cas','organism_lifestage','organism_age_mean','organism_gender','study_duration_mean','exposure_duration_mean','test_type','num_doses_mean','application_freq_mean','halflife_mean']]