This notebook will pull the necessary data from the historical data to the main database.

Locations:
```
Historical Data:
DB = oasis-data
Schema = dbo
Table = tblDataHistory

Main ESP Table:
DB = esp-data
Schema = public
Table = data

# use table espaddr for checking which addresses need to be used
```

In [1]:
"""
For setting up local imports in an Ipython Shell
This is a workaround for ipython, dont need it for basic python scripts
"""
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [2]:
from library import lib_aws
import pandas as pd
import time
import numpy as np

In [3]:
# Helper functions
# Clean up strings
def node_clean(node_str):
    """
    Function that cleans up NodeID strings
    """
    node_str = " ".join(node_str.split())  # remove empty white spaces
    node_str = node_str.replace('#', "").strip().lower().title()  # remove # character, plus clean characters
    node_str =  node_str[0:-2] + node_str[-2:].upper() # last 2 characters whill alwsy be upper case
    return node_str

# Transferring Data from Backups to Main DB

In [None]:
# Get the addresses we need to pull
query = """
select * from espaddr;
"""

with lib_aws.PostgresRDS(db='esp-data') as engine:
    esp_addr = pd.read_sql(query, engine)

address= esp_addr.dropna().Address.astype(int).unique()
esp_addr.head()

In [None]:
def import_data(addr):
    cut_off_dt = pd.Timestamp('2020-08-18')
    query = """
    select * from dbo."tblDataHistory"
    where "Address" = {};
    -- order by "NodeID", "Date";
    """.format(addr)
    
    with lib_aws.PostgresRDS(db='oasis-data') as engine:
        sample = pd.read_sql(query, engine, parse_dates=['Date'])

    # data modification
    sample.NodeID = sample.NodeID.apply(node_clean)
    sample = sample[sample.Date < cut_off_dt]
    sample.drop_duplicates(subset=['NodeID', 'Date', 'Address'], inplace=True)
    sample.reset_index(inplace=True, drop=True)
    
    return sample

In [None]:
complete = [32176,
            32166,
            32141,
            32145,
            32140,
            32137,
            32125,
            32126,
            40760,
            42150,
            42104,
            40003,
            40007,
            30234,
            30127,
            30170,
            30174]

addr_to_use = np.setdiff1d(address, complete)
addr_to_use

In [None]:
%%time
addr_to_use = [42156, 42158]
for a in addr_to_use:
    t0 = time.time()
    print(f"------------\nAddress Working on: {a}")
    
    # IMporting data
    data = import_data(a)
    t2 = time.time()
    print("Data Imported in time {:.2f}".format(t2-t0))
    
    # Add data to the db
    lib_aws.AddData.add_data(df=data, 
                             db='esp-data', 
                             table='data',
                             merge_type='append',  # Only use replace if you know what you are doing
                             index_col='NodeID') 

    t3 = time.time()
    print("Full process time {:.2f}".format(t3-t0))
    complete.append(a)
    

In [None]:
# Grouped Query
gp_query = """
select * from dbo."tblDataHistory"
where "Address" = {};
"""

with lib_aws.PostgresRDS(db='oasis-data') as engine:
    

In [None]:
# %%time
# sample_df = import_data(30234)

lib_aws.AddData.add_data(df=sample_df, 
                         db='esp-data', 
                         table='data',
                         merge_type='append',  # Only use replace if you know what you are doing
                         index_col='NodeID') 

# Creating a new table which just ESP Data

Steps:
- Get List of NodeID's which are ESP's
- While iterating over each well:
    - Import data from `db:esp-data table:public.data`
    - Transfer this data to `db:esp-data table:public.espdata`

In [4]:
# Get list of ESP NodeID's

# gets list of wells which have 
query = """
select distinct("NodeID") 
from data 
where "Address" in (32176, 42150, 30272);
"""

with lib_aws.PostgresRDS(db='esp-data') as engine:
    esp_wells = pd.read_sql(query, engine)
    
esp_wells.head()

Unnamed: 0,NodeID
0,Drummond 5501 41-21 7B
1,Bobby 5502 42-11 3B
2,Jensen 5501 11-18 4B
3,Lite 5393 31-11 10T
4,Spratley Federal 5494 44-13 3BX


In [5]:
all_wells = esp_wells.NodeID.values
complete_wells = ['Drummond 5501 41-21 7B']

wells_to_use = np.setdiff1d(all_wells, complete_wells)
well_gps = np.array_split(wells_to_use, 10)

In [7]:
for well_array in well_gps:
    t0 = time.time()
    print("---------------\nWells Being Worked on", *well_array, sep='\n')

    well_query = """
    select * from data where "NodeID" in {} 
    """.format(tuple(well_array))
    
    try:
        with lib_aws.PostgresRDS(db='esp-data') as engine:
            df = pd.read_sql(well_query, engine,parse_dates=['Date'])
    except Exception as e:
        print(e)
        df = pd.DataFrame()

    if df.empty:
        print("No data Queried")
        
    else:
        t2 = time.time()
        print("Data Imported in time {:2f}".format(t2-t0))
        
        # Add data to the db
        lib_aws.AddData.add_data(df=df,
                                 db='esp-data',
                                 table='espdata',
                                 merge_type='append',  # Only use replace if you know what you are doing
                                 index_col='NodeID')

        t3 = time.time()
        print("Full process time {:.2f}".format(t3 - t0))
        complete_wells = complete_wells + list(well_array)
        del df



---------------
Wells Being Worked on
Berry 5493 41-7 6B
Berry 5493 42-7 8T
Berry 5493 42-7 9B
Berry 5493 44-7 12T
Berry 5493 44-7 14BX
Berry 5493 44-7 15TX
Blanchet Fed 41-28H
Bobby 5502 14-2 6B
Bobby 5502 14-2 7B
Bobby 5502 14-2 8BX
Data Imported in time 276.193613
Data appended on Table espdata in time 79.18s
Full process time 355.37
---------------
Wells Being Worked on
Bobby 5502 42-11 2BX
Bobby 5502 42-11 3B
Bobby 5502 42-11 4B
Bobby 5502 42-11 5B
Brunson 9-8WA
Cook 41-12 11T
Crane Federal 41-26H
Crane Federal 5300 14-27 3B
Crane Federal 5300 14-27 5B
Crane Federal 5300 34-27 7B
Data Imported in time 92.258590
Data appended on Table espdata in time 89.97s
Full process time 182.23
---------------
Wells Being Worked on
Dawson 5494 41-12 2T
Dawson 5494 42-12 3B
Dawson 5494 42-12 4T
Dawson 5494 42-12 5B
Dawson 5494 42-12 6T
Dawson 5494 43-12 11T
Dixon 5602 42-34 2B
Dixon 5602 42-34 4B
Drummond 5501 42-21 3B
Drummond 5501 42-21 5B
Data Imported in time 206.662251
Data appended on Tabl