# Dataport Database Extration for: Exploring how EV charging aligns with rooftop solar generation by homes

## This notebook will connect to the database and extract the data live and put it into compressed zip files in this directory. 


<p>We will be using Pecan Street Inc. data from Dataport to determine how electric vehicle charging aligns with rooftop solar generation.
    
<br>Data from 24 homes with fairly complete data for the year 2018 is used to explore this question.
    
<br>
Pecans Streets data can be obtained by applying for a dataport account at https://www.dataport.pecanstreet.org.</p>

<br>
You'll need to modify the read_csv calls in that notebook to point at these instead of the ones we've extracted and prepared for you in the /shared/JupyterHub-Examples-Data/ directory on the JupyterHub server if you would like to use the ones exported by this notebook.

In [None]:
#import packages
import pandas as pd
import matplotlib.pyplot as plt
import psycopg2
import sqlalchemy as sqla
import os
import sys
sys.path.insert(0,'..')
from config.read_config import get_database_config
import numpy as np
%matplotlib inline
sys.executable  # shows you your path to the python you're using

In [None]:
# read in db credentials from ../config/config.txt
# * make sure you add those to the ../config/config.txt file! *

database_config = get_database_config("../config/config.txt")

In [None]:
# get our DB connection
engine = sqla.create_engine('postgresql://{}:{}@{}:{}/{}'.format(database_config['username'],
                                                                     database_config['password'],
                                                                     database_config['hostname'],
                                                                     database_config['port'],
                                                                     database_config['database']
                                                                     ))


In [None]:
#Select a list of Texas homes from dataport metadata having CAR and solar configured and also has data for year 2018.

query = """select distinct dataid from other_datasets.metadata 
                                          where car1='yes' and solar='yes' 
                                          and egauge_1min_min_time < '2018-01-01' 
                                          and egauge_1min_max_time > '2019-01-01'
                                          and state='Texas'
                                          and (egauge_1min_data_availability like '100%' 
                                               or 
                                               egauge_1min_data_availability like '99%')
                                          LIMIT 25
                                          ;
         """

df = pd.read_sql_query(sqla.text(query), engine)


In [None]:
# grab dataids and convert them to a string to put into the SQL query
dataids_list = df['dataid'].tolist()
print("{} dataids selected listed here:".format(len(dataids_list)))
dataids_str = ','.join(list(map(str, dataids_list)))
dataids_str
dataids_list

In [None]:
#Check data completeness for dataids selected from metadata above.

query2 = """select dataid,count(*) total_rec from electricity.eg_realpower_1min 
            where dataid in ({})""".format(dataids_str)
query2 = query2 + """ and localminute >= '2018-01-01' and localminute < '2019-01-01' group by 1"""

df2 = pd.read_sql_query(sqla.text(query2), engine)

In [None]:
#Select homes with atleast 90% data availability for year 2018.
df2['perc'] = (df2['total_rec']/525600)*100
final_dataids = df2[df2['perc'] >= 90]
final_dataids['dataid'].count()

In [None]:
# Pull data for homes
final_dataids_list = final_dataids['dataid'].tolist()
print("{} dataids selected listed here:".format(len(final_dataids_list)))
final_dataids_str = ','.join(list(map(str, final_dataids_list)))
final_dataids_str
final_dataids_list

In [None]:
#fall
fall = """select localminute::timestamp,car1,solar,grid 
               from electricity.eg_realpower_1min 
               where localminute >= '2018-09-01' and localminute <  '2018-12-01' """
fall = fall + """AND dataid in ({})""".format(final_dataids_str)

fall_df = pd.read_sql_query(sqla.text(fall), engine)

fall_df.head(15)

In [None]:
# export fall to a zipped csv
compression_opts = dict(method='zip',
                        archive_name='ev_charging_alignmnent_fall.csv')
fall_df.to_csv('ev_charging_alignmnent_fall.zip', index=False,
          compression=compression_opts)

In [None]:
#spring
spring = """select localminute::timestamp,car1,solar,grid 
               from electricity.eg_realpower_1min 
               where localminute >= '2018-03-01' and localminute <  '2018-06-01' """
spring = spring + """AND dataid in ({})""".format(final_dataids_str)

spring_df = pd.read_sql_query(sqla.text(spring), engine)

In [None]:
# export spring to a zipped csv
compression_opts = dict(method='zip',
                        archive_name='ev_charging_alignmnent_spring.csv')
spring_df.to_csv('ev_charging_alignmnent_spring.zip', index=False,
          compression=compression_opts)

In [None]:
#summer
summer = """select localminute::timestamp,car1,solar,grid 
               from electricity.eg_realpower_1min 
               where localminute >= '2018-06-01' and localminute <  '2018-09-01' """
summer = summer + """AND dataid in ({})""".format(final_dataids_str)

# create a dataframe with the data from the sql query
summer_df = pd.read_sql_query(sqla.text(summer), engine)

In [None]:
# export summer to a zipped csv
compression_opts = dict(method='zip',
                        archive_name='ev_charging_alignmnent_summer.csv')
summer_df.to_csv('ev_charging_alignmnent_summer.zip', index=False,
          compression=compression_opts)

In [None]:
#winter
winter = """select localminute::timestamp,car1,solar,grid 
               from electricity.eg_realpower_1min 
               where localminute >= '2018-12-01' and localminute <  '2019-03-01' """
winter = winter + """AND dataid in ({})""".format(final_dataids_str)

# create a dataframe with the data from the sql query
winter_df = pd.read_sql_query(sqla.text(winter), engine)

In [None]:
# export winter to a zipped csv
compression_opts = dict(method='zip',
                        archive_name='ev_charging_alignmnent_winter.csv')
winter_df.to_csv('ev_charging_alignmnent_winter.zip', index=False,
          compression=compression_opts)