# Connect to VARS database and pull down data

**Note** that in order for this to work, in addition to VARS_logon_info.txt and VARS.py, you will need the following driver file in your working directory: <span style="color:blue"> mssql-jdbc-8.2.2.jre8.jar </span>

For Lis Henderson

In [1]:
## Imports

import pandas as pd
import numpy as np

import re # for extracting logon info from text file

import jaydebeapi # for connecting to VARS db
import VARS # for connecting to VARS db

In [2]:
## Extract logon information from text file -- This uses a guest username and pw that will let you download data but not overwrite anything!

# Get list of each line in file
filename = 'VARS_logon_info.txt'
f = open(filename, 'r')
lines = f.readlines()
f.close()

# Function for extracting information from lines
def get_single_quoted_text(s):
    """ 
    Takes string s and returns any text in s that is between the first set of single quotes, removing whitespace. 
    
    Example:
    s = "What if there's more ' than one' sest of single' quotes?"
    get_single_quoted_text(s) --> 's more'
    
    """
    
    extracted_text = re.search('''(?<=')\s*[^']+?\s*(?=')''', s)
    return(extracted_text.group().strip())

# Assign logon info
dr = get_single_quoted_text(lines[2])
name = get_single_quoted_text(lines[3])
pw = get_single_quoted_text(lines[4])
un = get_single_quoted_text(lines[5])
url = get_single_quoted_text(lines[6])

In [3]:
## Build SQL query -- This query grabs select columns (see those listed under SELECT) from the annotations table for non-embargoed records from 1989 (see last two lines)

sql = """
        SELECT index_recorded_timestamp,
               observation_uuid,
               concept,
               observation_group,
               observer,
               image_url,
               depth_meters,
               latitude,
               longitude,
               oxygen_ml_per_l,
               psi,
               salinity,
               temperature_celsius,
               video_uri,
               video_sequence_name,
               chief_scientist
        FROM annotations a
        WHERE NOT EXISTS (
           SELECT DISTINCT observation_uuid
           FROM annotations b
           WHERE (
             (  -- Delete last 2 years of annotations
             index_recorded_timestamp > DATEADD([year], - 2, GETDATE()) OR
             index_recorded_timestamp IS NULL OR
             index_recorded_timestamp < CAST('1970-01-02' AS datetime)
             )
           OR ( -- Delete embargoes by dive
             dive_number IN ('Ventana 50', 'Ventana 217', 'Ventana 218', 'Ventana 248')
              )
           OR (
             dive_number IN ('Tiburon 1001', 'Tiburon 1029', 'Tiburon 1030', 'Tiburon 1031', 'Tiburon 1032', 'Tiburon 1033', 'Tiburon 1034')
             )
           OR ( -- Delete embargoes by selectedConcept
             concept IN (
                 'Aegina sp. 1',
                 'Ctenophora',
                 'Cydippida 2',
                 'Cydippida',
                 'Intacta',
                 'Llyria',
                 'Lyrocteis',
                 'Lyroctenidae',
                 'Mertensia',
                 'Mertensiidae sp. A',
                 'Mystery Mollusc',
                 'Mystery Mollusc',
                 'Physonectae sp. 1',
                 'Platyctenida sp. 1',
                 'Platyctenida',
                 'Thalassocalycida sp. 1',
                 'Thalassocalycida',
                 'Thliptodon sp. A',
                 'Tjalfiella tristoma',
                 'Tjalfiella',
                 'Tjalfiellidae',
                 'Tuscarantha braueri',
                 'Tuscarantha luciae',
                 'Tuscarantha',
                 'Tuscaretta globosa',
                 'Tuscaretta',
                 'Tuscaridium cygneum',
                 'Tuscaridium',
                 'Tuscarilla campanella',
                 'Tuscarilla nationalis',
                 'Tuscarilla similis',
                 'Tuscarilla',
                 'Tuscarora',
                 'Tuscaroridae'
                 )
            )
        ) AND a.observation_uuid = b.observation_uuid
    ) AND index_recorded_timestamp >= CAST('1989-01-01' AS datetime) 
      AND index_recorded_timestamp <= CAST('1989-12-31' AS datetime)
    """

I don't know if you're familiar with SQL. The VARS database is a Microsoft SQL Server database. Some other simpler queries:

Get all columns of all records in the annotations table from 2017.
```python
sql2 = """
    SELECT * 
    FROM annotations a
    WHERE index_recorded_timestamp >= CAST('2017-01-01' AS datetime)
        AND index_recorded_timestamp <= CAST('2017-12-31' AS datetime)
       """
```

Get time and species columns for all Dosidicus gigas sightings.
```python
sql3 = """
    SELECT index_recorded_timestamp
           concept
    FROM annotations a
    WHERE concept in ('Dosidicus', 
                      'Dosidicus gigas', 
                      'Humboldt squid')
       """
```

In [3]:
sql = """
SELECT *
FROM annotations a
WHERE concept = 'Bathocyroidae'
"""

In [4]:
## Query the database

# Get connection
conn = VARS.get_db_conn(dr, url, un, pw, name)

# Submit query
data, col_names = VARS.get_data(conn, sql)

# Close connection
conn.close()

**Just FYI:** In my original notebook, I say that jaydebeapi was throwing an Attribute Error ("java.sql.Types has no attribute \_\_javaclass__") in the \_jdbc_connect_jpype function in jaydebeapi. Although I couldn't find the exact error online, it seemed related to another which was due to the newest (0.7.0) installation of JPype being buggy. I used:

```python
pip install --user JPype1==0.6.3 --force-reinstall
```

in the command line to install a previous version, and that fixed it.

In [5]:
## Check data is there

# Set column names
data.columns = col_names

print(data.shape)
data.head()

(4, 60)


Unnamed: 0,imaged_moment_uuid,index_elapsed_time_millis,index_recorded_timestamp,index_timecode,observation_uuid,activity,concept,duration_millis,observation_group,observation_timestamp,...,video_description,video_duration_millis,video_name,video_start_timestamp,camera_id,video_sequence_description,video_sequence_name,chief_scientist,dive_number,camera_platform
0,5D37029D-76C1-4AB7-83E6-4776147142D6,,2001-04-26 02:17:21,05:20:20:21,7A1F6866-CF9E-43B4-8165-9ED9F6131AD6,cruise,Bathocyroidae,,ROV,2001-04-25 20:19:32,...,,3600000.0,T0306-02,2001-04-26 02:11:48,Tiburon,,Tiburon 0306,Gary Greene,Tiburon 0306,Tiburon
1,3DFFF7C8-094D-49E9-AD91-99388398EB0D,,2001-04-26 02:17:08,05:20:07:21,1AAFBAE8-CFA9-4D7B-9B91-97AE516924A2,cruise,Bathocyroidae,,ROV,2001-04-25 20:19:40,...,,3600000.0,T0306-02,2001-04-26 02:11:48,Tiburon,,Tiburon 0306,Gary Greene,Tiburon 0306,Tiburon
2,DFB81735-D3D3-405D-A038-02B3BCB2F73A,,2014-06-23 14:10:43,00:20:32:01,72045501-A5DB-474D-96D9-90ECEFD03744,descend,Bathocyroidae,,ROV,2014-09-11 19:44:14.867000,...,,3600000.0,D0628-01HD,2014-06-23 13:50:24,Doc Ricketts,,Doc Ricketts 0628,Bob Vrijenhoek,Doc Ricketts 0628,Doc Ricketts
3,74A8652D-7B20-4462-B020-4A19336E77E8,,2001-04-26 02:05:20,05:10:56:02,ABAB64B0-0471-4934-9206-D556171B45A0,cruise,Bathocyroidae,,ROV,2001-04-25 20:19:45,...,,3600000.0,T0306-01,2001-04-26 01:02:30,Tiburon,,Tiburon 0306,Gary Greene,Tiburon 0306,Tiburon


In [None]:
## Save data, since it will be a lot faster to load rather than pulled data from the database every time

data.to_csv('data.csv', index=False, na_rep='NaN')