In [24]:
# Importing relevant libraries
import pandas as pd
import sqlite3

In [53]:
# Create a connection and a cursor for querying
conn = sqlite3.connect("../data/fishbase_data.sqlite")
c = conn.cursor()

In [54]:
# Print out table names from the database
c.execute("SELECT name FROM sqlite_master WHERE type='table'")
c.fetchall()

[('fishbase_temperature',),
 ('fishbase_environment',),
 ('fishbase_extra_data',),
 ('seq_COI',),
 ('seq_CYTB',),
 ('seq_ND5',)]

In [55]:
# Read fishbase_environment table
fishbase_env = pd.read_sql_query("SELECT species, climate, environment, depth_min, depth_max FROM fishbase_environment", conn)
fishbase_env.shape

(1265, 5)

In [56]:
fishbase_temp = pd.read_sql_query("SELECT species, temperature_min, temperature_max FROM fishbase_temperature", conn)
fishbase_temp.shape

(1265, 3)

In [57]:
# Read sequence data. Note! we need to use the drop_duplicates method to remove species duplicates from the table
seq_COI = pd.read_sql_query("SELECT species, sequence AS COI_sequence FROM seq_COI", conn)
seq_COI.drop_duplicates('species', inplace = True)

(1220, 2)

In [58]:
seq_CYTB = pd.read_sql_query("SELECT species, sequence AS CYTB_sequence FROM seq_CYTB", conn)
seq_CYTB.drop_duplicates('species', inplace = True)

(1217, 2)

In [59]:
seq_ND5 = pd.read_sql_query("SELECT species, sequence AS ND5_sequence FROM seq_ND5", conn)
seq_ND5.drop_duplicates('species', inplace = True)

(1219, 2)

In [60]:
# It is good practice to close the connection to the database once we have finished using it
conn.close()

In [61]:
fishbase_joined = pd.merge(fishbase_env, fishbase_temp, on = 'species', how = 'inner')

(1265, 7)

In [62]:
fishbase_joined = pd.merge(fishbase_joined, seq_COI, on = 'species', how = 'inner')

(1178, 8)

In [63]:
fishbase_joined = pd.merge(fishbase_joined, seq_CYTB, on = 'species', how = 'inner')

(1175, 9)

In [64]:
fishbase_joined = pd.merge(fishbase_joined, seq_ND5, on = 'species', how = 'inner')

(1174, 10)

In [65]:
fishbase_joined.head()

Unnamed: 0,species,climate,environment,depth_min,depth_max,temperature_min,temperature_max,COI_sequence,CYTB_sequence,ND5_sequence
0,Micropterus floridanus,subtropical,freshwater benthopelagic,,,,,MAITRWFFSTNHKDIGTLYLVFGAWAGMVGTALSLLIRAELSQPGA...,MASLRKTHPLLKIANDALVDLPAPSNISVWWNFGSLLGLCLATQIL...,MHPTSLVMTSSLVTIFFLLTFPVLTTLTPRPPEITWALSHVKTAVK...
1,Ischikauia steenackeri,temperate,freshwater demersal,,,,,MAITRWFFSTNHKDIGTLYLVFGAWAGMVGTALSLLIRAELSQPGS...,MASLRKTHPLMKIANDALVDLPTPSNISVWWNFGSLLGLCLITQIL...,MNSTTLIMSSSLILVLTILMLPLLTTLNPKPQKPEWASTHVKTAVS...
2,Pseudohemiculter dispar,subtropical,freshwater benthopelagic brackish,,,,,MAITRWFFSTNHKDIGTLYLVFGAWAGMVGTALSLLIRAELSQPGS...,MASLRKTHPLMKIANDALVDLPTPSNISVWWNFGSLLGLCLITQIL...,MNSTTLIMSSSLILVLIILMLPLVTTLSPKPQKPEWANTHVKTAVS...
3,Tinca tinca,temperate,freshwater brackish potamodromous demersal,1.0,,4.0,24.0,MAITRWFFSTNHKDIGTLYLVFGAWAGMVGTALSLLIRAELSQPGS...,MASLRKTHPLIKIANDALVDLPTPSNISVWWNFGSLLGLCLIIQIL...,MNSTTLIMSSSLILVLTILMLPLLTSLNPNPQKPQWANTHVKTAVS...
4,Polypterus weeksii,tropical,freshwater demersal,,,,,MTITRWLFSTNHKDIGTLYLIFGAWAGMVGTALSLLIRAELGQPGA...,MAIIRKTHPLAKIINSAFIDLPAPSNISSWWNMGSLLGLCLIAQII...,MSITQLSQMFMTCLSLTIIILILPIMLSLMTKPSNNWPYQVKNAVK...


In [67]:
fishbase_joined.to_csv('../data/fishbase_joined.csv')