# Timing

In [1]:
import pandas as pd
import numpy as np
import pickle
import pyodbc
import os
import sys

sys.path.append("S:\\Engineering\\01. Development\\Financial Engine\\")

from utils.pandas_utils import *
from utils.custom_tfs import *
from numba import jit, prange
import matplotlib.pyplot as plt
import datetime
%matplotlib inline

In [2]:
%load_ext nb_black

<IPython.core.display.Javascript object>

In [3]:
# db connections
driver = "{ODBC Driver 17 for SQL Server}"
server = "syntax-dev-sql.database.windows.net"
port = 1443

user = os.environ["ALLSQUARE_USERNAME"]
pwd = os.environ["ALLSQUARE_PASSWORD"]
auth = "ActiveDirectoryPassword"
cnxns = dict()
dbs = ["syntax_processed", "syntax_dev", "syntax_dev_hist", "stg_cg_export"]
for db in dbs:
    cnxn_string = f"DRIVER={driver};PORT={port};SERVER={server};PORT={port};DATABASE={db};UID={user};PWD={pwd};Authentication={auth}"
    cnxns[db] = pyodbc.connect(cnxn_string)

<IPython.core.display.Javascript object>

## Data Needed:
1. Increased Density
2. f1000
3. f1001
4. f1002
5. Section Assumption

In [4]:
get_index_well = """
SELECT
    api
    from dbo.index_well
where is_horizontal = 1
"""

<IPython.core.display.Javascript object>

In [5]:
horizontal_wells = (
    pd.read_sql(get_index_well, cnxns["stg_cg_export"])
    .pipe(ccast, (["api"], tf_api))
    .api.unique()
)

<IPython.core.display.Javascript object>

In [6]:
get_inc_den = """
SELECT
 [Cause Number] as cause_number,
 trsm_heh,
 [Number of Additional Wells] as nwells,
 [Order Type] as order_type,
 [App or Order] as app_order,
 [date_app] as app_date,
 [date_order] as order_date
FROM  dbo.reg_id; 
"""

<IPython.core.display.Javascript object>

In [7]:
remove_order_types = ["Dismissed", "Order Denying Motion"]

<IPython.core.display.Javascript object>

In [8]:
incden = (
    pd.read_sql(get_inc_den, cnxns["syntax_dev_hist"])
    .pipe(ccast, (["app_date", "order_date"], tf_date))
    .pipe(ccast, (["nwells"], tf_number_wells))
    .pipe(ccast, (["cause_number"], tf_cause_number))
)

<IPython.core.display.Javascript object>

In [9]:
incden_app_sec = (
    incden.query("app_order == 'App'")
    .reset_index(drop=True)
    .groupby("trsm_heh", as_index=False)
    .agg({"app_date": "last", "nwells": "last"})
    .reset_index(drop=True)
    .pipe(csetcols, cols=["trsm_heh", "app_date", "app_nwells"])
)

<IPython.core.display.Javascript object>

In [10]:
incden_order_sec = (
    incden.query(
        "app_order == 'Order' and order_type not in @remove_order_types"
    )
    .reset_index(drop=True)
    .groupby("trsm_heh", as_index=False)
    .agg({"order_date": "last", "nwells": "last"})
    .reset_index(drop=True)
    .pipe(csetcols, cols=["trsm_heh", "order_date", "order_nwells"])
)

<IPython.core.display.Javascript object>

## Index well to map api to sections

In [11]:
get_api_section = """
SELECT 
    api,
    trsm_heh
FROM dbo.index_well_land
WHERE use_for_allocation = 1
"""

<IPython.core.display.Javascript object>

In [12]:
api_section = pd.read_sql(get_api_section, cnxns["stg_cg_export"]).pipe(
    ccast, (["api"], tf_api)
)

<IPython.core.display.Javascript object>

## F1002a
 IP generator needs wells tagged with formations. Hence reading from the result of Formation Normalization

In [13]:
f1002 = pd.read_pickle("../data/eff_sec_form_norm.pickle")

f1002 = f1002.pipe(ccast, (["api"], tf_api)).pipe(
    ccast, (["formation"], tf_formation)
)

f1002_api = f1002.api.unique()

<IPython.core.display.Javascript object>

In [14]:
f1002_sec = (
    f1002.pipe(ccast, (["api"], tf_api))
    .groupby(["trsm_heh", "formation"], as_index=False)
    .agg({"well_completion": ["count", "last"]})
    .reset_index(drop=True)
    .pipe(
        csetcols,
        cols=["trsm_heh", "formation", "f1002a_nwells", "f1002a_last_date"],
    )
)

<IPython.core.display.Javascript object>

## F1000

In [15]:
get_f1000 = """
SELECT 
    api_syntax as api,
    date_effective as permit_date
FROM dbo.w_occ_1000;
"""

<IPython.core.display.Javascript object>

In [16]:
f1000 = (
    pd.read_sql(get_f1000, cnxns["syntax_processed"])
    .pipe(ccast, (["api"], tf_api))
    .query("api not in @f1002_api and api in @horizontal_wells")
    .pipe(ccast, (["permit_date"], tf_date))
    .merge(api_section, on="api")
    .sort_values(["api", "trsm_heh", "permit_date"], ascending=False)
    .groupby(["api", "trsm_heh"], as_index=False)
    .first()
    .reset_index(drop=True)
)

<IPython.core.display.Javascript object>

In [17]:
assert (
    f1000.loc[:, ["api", "trsm_heh"]].drop_duplicates().shape[0]
    == f1000.shape[0]
), "Duplicates API's in f1000"

<IPython.core.display.Javascript object>

In [18]:
f1000_sec = (
    f1000.groupby(["trsm_heh"], as_index=False)
    .agg({"permit_date": ["count", "last", "first"]})
    .reset_index(drop=True)
    .pipe(
        csetcols,
        cols=[
            "trsm_heh",
            "f1000_nwells",
            "f1000_last_date",
            "f1000_first_date",
        ],
    )
)

<IPython.core.display.Javascript object>

## F1001

In [19]:
get_f1001 = """
SELECT 
    api_syntax as api,
    date_effective as spud_date
FROM dbo.w_occ_1001;
"""

<IPython.core.display.Javascript object>

In [20]:
f1001 = (
    pd.read_sql(get_f1001, cnxns["syntax_processed"])
    .pipe(ccast, (["api"], tf_api))
    .query("api not in @f1002_api and api in @horizontal_wells")
    .pipe(ccast, (["spud_date"], tf_date))
    .merge(api_section, on="api")
    .sort_values(["api", "trsm_heh", "spud_date"], ascending=False)
    .groupby(["api", "trsm_heh"], as_index=False)
    .first()
    .reset_index(drop=True)
)

<IPython.core.display.Javascript object>

In [21]:
assert (
    f1001.loc[:, ["api", "trsm_heh"]].drop_duplicates().shape[0]
    == f1001.shape[0]
), "Duplicates API's in f1001"

<IPython.core.display.Javascript object>

In [22]:
f1001_sec = (
    f1001.groupby(["trsm_heh"], as_index=False)
    .agg({"spud_date": ["count", "last", "first"]})
    .reset_index(drop=True)
    .pipe(
        csetcols,
        cols=[
            "trsm_heh",
            "f1001_nwells",
            "f1001_last_date",
            "f1001_first_date",
        ],
    )
)

<IPython.core.display.Javascript object>

## Section Assumption

In [23]:
sec_ass = (
    pd.read_csv("../data/section_assumption.csv")
    .pipe(
        ccast,
        (
            ["nwells_1", "tolerance_1", "nwells_2", "tolerance_2"],
            tf_number_wells,
        ),
    )
    .pipe(ccast, (["section_acres"], tf_section_acres))
    .pipe(ccast, (["formation_1", "formation_2"], tf_formation))
)

  interactivity=interactivity, compiler=compiler, result=result)


<IPython.core.display.Javascript object>

### Timing logic is only applied to the zone 1. Only zone 1 will have current wells as well. For Secondary zones the timings and number of wells are set directly

In [90]:
f1002_sec.head()

Unnamed: 0,trsm_heh,formation,f1002a_nwells,f1002a_last_date
0,"01N-01E,28 IM",WOODFORD,1,2014-04-13
1,"01N-01E,29 IM",WOODFORD,1,2016-12-19
2,"01N-01W,16 IM",WOODFORD,1,2018-07-09
3,"01N-01W,17 IM",WOODFORD,1,2018-07-09
4,"01N-01W,19 IM",WOODFORD,1,2014-08-25


<IPython.core.display.Javascript object>

In [112]:
f1002a_f1 = (
    sec_ass.merge(
        f1002_sec,
        left_on=["trsm_heh", "formation_2"],
        right_on=["trsm_heh", "formation"],
        how="left",
    )
    .loc[:, ["trsm_heh", "f1002a_nwells", "f1002a_last_date"]]
    .rename(
        columns={
            "f1002a_nwells": "f1002a_nwells_f1",
            "f1002a_last_date": "f1002a_last_date_f1",
        }
    )
)

f1002a_f2 = (
    sec_ass.merge(
        f1002_sec,
        left_on=["trsm_heh", "formation_2"],
        right_on=["trsm_heh", "formation"],
        how="left",
    )
    .loc[:, ["trsm_heh", "f1002a_nwells", "f1002a_last_date"]]
    .rename(
        columns={
            "f1002a_nwells": "f1002a_nwells_f2",
            "f1002a_last_date": "f1002a_last_date_f2",
        }
    )
)

<IPython.core.display.Javascript object>

In [113]:
df = (
    sec_ass.merge(f1002a_f1, on="trsm_heh", how="left")
    .merge(f1002a_f2, on="trsm_heh", how="left")
    .merge(f1001_sec, on="trsm_heh", how="left")
    .merge(f1000_sec, on="trsm_heh", how="left")
    .merge(incden_order_sec, on="trsm_heh", how="left")
    .merge(incden_app_sec, on="trsm_heh", how="left")
)

<IPython.core.display.Javascript object>

## Timing Logic Starts Here

In [114]:
num_cols = [
    "nwells_1",
    "tolerance_1",
    "nwells_2",
    "tolerance_2",
    "f1001_nwells",
    "f1000_nwells",
    "app_nwells",
    "order_nwells",
    "f1002a_nwells_f1",
    "f1002a_nwells_f2",
]

date_cols = [
    "f1001_last_date",
    "f1001_first_date",
    "f1000_last_date",
    "f1000_first_date",
    "app_date",
    "order_date",
    "f1002a_last_date_f1",
    "f1002a_last_date_f2",
]

<IPython.core.display.Javascript object>

The below columns should be sent in the same format and order

In [116]:
assert df.shape[0] == 71068, "Missing Sections"

<IPython.core.display.Javascript object>

In [117]:
well_numbers = df.loc[:, num_cols].apply(tf_number_wells).values.transpose()
well_dates = (
    df.loc[:, date_cols].apply(pd.to_datetime).values.astype("<M8[D]")
).transpose()

<IPython.core.display.Javascript object>

In [118]:
# tolerance, spud_to_rig_release, frac_to_sales, permit_to_spud, idorder_to_spud, idapp_to_spud, days_ntg_f1_pd, days_ntg_f2_pd
timing_dates = (45, 20, 45, 180, 365, 545, 365 * 3, 365 * 6)
timing_dates = np.array([np.timedelta64(date, "D") for date in timing_dates])

current_dates = np.repeat(
    np.datetime64((datetime.datetime.now()).strftime("%Y-%m-%d")).astype(
        "<M8[D]"
    ),
    well_numbers.shape[1],
)
ph_date = np.repeat(
    np.datetime64("1900-01-01", type=current_dates.dtype), well_numbers.shape[1]
)

<IPython.core.display.Javascript object>

In [267]:
def timing(well_numbers, well_dates, timing_dates, current_dates, ph_date):
    """
        well_numbers = [
        "nwells_1", 0
        "tolerance_1", 1
        "nwells_2", 2
        "tolerance_2", 3
        "f1001_nwells", 4
        "f1000_nwells", 5
        "app_nwells", 6
        "order_nwells", 7
        "f1002a_nwells_f1", 8
        "f1002a_nwells_f2", 9
    ]

    well_dates = [
        "f1001_last_date", 0
        "f1001_first_date", 1
        "f1000_last_date", 2
        "f1000_first_date", 3
        "app_date", 4
        "order_date", 5
        "f1002a_last_date_f1", 6
        "f1002a_last_date_f2", 7
    ]

    timing_dates = [
        tolerance, 0
        spud_to_rig_release, 1
        frac_to_sales, 2
        permit_to_spud, 3
        idorder_to_spud, 4
        idapp_to_spud, 5
        days_ntg_f1_pd, 6
        days_ntg_f2_pd, 7
    ]
    """
    nsections = well_numbers.shape[1]

    # Primary zone array initialization
    nwells_curr = np.zeros(shape=nsections, dtype=well_numbers.dtype)
    nwells_fut = np.zeros(shape=nsections, dtype=well_numbers.dtype)
    formation = np.uint8(1) + np.zeros(
        shape=nsections, dtype=well_numbers.dtype
    )

    spud_date_curr = ph_date.copy()
    sales_date_curr = spud_date_curr.copy()

    spud_date_fut = spud_date_curr.copy()
    sales_date_fut = spud_date_curr.copy()

    # Secondary zone array initialization
    nwells_curr_2 = np.zeros(shape=nsections, dtype=well_numbers.dtype)
    nwells_fut_2 = np.zeros(shape=nsections, dtype=well_numbers.dtype)
    formation_2 = np.uint8(2) + np.zeros(
        shape=nsections, dtype=well_numbers.dtype
    )

    spud_date_curr_2 = ph_date.copy()
    sales_date_curr_2 = spud_date_curr.copy()

    spud_date_fut_2 = spud_date_curr.copy()
    sales_date_fut_2 = spud_date_curr.copy()

    for i in range(nsections):

        # Primary Formation Calculation
        # If f1001 last_date is greateer than f1000 last date + tolerance then f1001 wells else f1000 nwells
        if well_numbers[4][i] > 0:
            if well_numbers[5][i] > 0:
                if well_dates[0][i] > well_dates[2][i] + timing_dates[0]:
                    nwells_curr[i] = well_numbers[4][i]
                else:
                    nwells_curr[i] = well_numbers[5][i]
            else:
                nwells_curr[i] = well_numbers[4][i]

        # If f1000 last_date is greateer than ID order date + tolerance then f1000 wells else ID Order wells
        elif well_numbers[5][i] > 0:
            if well_numbers[7][i] > 0:
                if well_dates[2][i] > well_dates[5][i] + timing_dates[0]:
                    nwells_curr[i] = well_numbers[5][i]
                else:
                    nwells_curr[i] = well_numbers[7][i]
            else:
                nwells_curr[i] = well_numbers[5][i]

        # If ID order exits then that
        elif well_numbers[7][i] > 0:
            nwells_curr[i] = well_numbers[7][i]

        # If ID app exits then that
        elif well_numbers[6][i] > 0:
            nwells_curr[i] = well_numbers[6][i]

        # If f1001 well exist then spud date for current wells is f1001 first well date
        if well_numbers[4][i] > 0 and well_dates[1][i] == well_dates[1][i]:
            spud_date_curr[i] = well_dates[1][i]

        # If f1000 well exist then spud date for current wells is f1000 first well date + permit to spud
        elif well_numbers[5][i] > 0 and well_dates[3][i] == well_dates[3][i]:
            spud_date_curr[i] = well_dates[3][i] + timing_dates[3]

        # If ID order exist then spud date for current wells is ID order date + idorder_to_spud
        elif well_numbers[7][i] > 0 and well_dates[5][i] == well_dates[5][i]:
            spud_date_curr[i] = well_dates[5][i] + timing_dates[4]

        # If ID app exist then spud date for current wells is ID app date + idapp_to_spud
        elif well_numbers[6][i] > 0 and well_dates[4][i] == well_dates[4][i]:
            spud_date_curr[i] = well_dates[4][i] + timing_dates[5]

        if nwells_curr[i] > 0:
            sales_date_curr[i] = (
                spud_date_curr[i]
                + (nwells_curr[i] * timing_dates[1])
                + timing_dates[2]
            )

        # If current wells and PDP wells are less than tolerance 1 then assign the difference as future wells.
        if nwells_curr[i] + well_numbers[8][i] <= well_numbers[1][i]:
            nwells_fut[i] = well_numbers[0][i] - nwells_curr[i]

        # If futre wells exist then spud date for future wells is current date + days ntg f1 prod.
        if nwells_fut[i] > 0:
            spud_date_fut[i] = current_dates[0] + timing_dates[6]
            sales_date_fut[i] = (
                spud_date_fut[i]
                + (nwells_fut[i] * timing_dates[1])
                + timing_dates[2]
            )

        # Secondary Formation Calculation
        if well_numbers[9][i] >= well_numbers[3][i]:
            nwells_fut_2[i] = np.uint8(0)
        else:
            nwells_fut_2[i] = well_numbers[2][i] - well_numbers[9][i]
            spud_date_fut_2[i] = current_dates[0] + timing_dates[7]
            sales_date_fut_2[i] = (
                spud_date_fut_2[i]
                + (nwells_fut_2[i] * timing_dates[1])
                + timing_dates[2]
            )

    return (
        nwells_curr,
        nwells_fut,
        spud_date_curr,
        sales_date_curr,
        spud_date_fut,
        sales_date_fut,
        nwells_curr_2,
        nwells_fut_2,
        spud_date_curr_2,
        sales_date_fut_2,
        spud_date_fut_2,
        sales_date_fut_2,
    )

<IPython.core.display.Javascript object>

In [268]:
nb_timing = jit(timing, nopython=True, fastmath=True, error_model="numpy")

<IPython.core.display.Javascript object>

In [269]:
cols = [
    "nwells_curr",
    "nwells_fut",
    "spud_date_curr",
    "sales_date_curr",
    "spud_date_fut",
    "sales_date_fut",
    "nwells_curr_2",
    "nwells_fut_2",
    "spud_date_curr_2",
    "sales_date_fut_2",
    "spud_date_fut_2",
    "sales_date_fut_2",
]

<IPython.core.display.Javascript object>

In [270]:
%%timeit
timing(well_numbers, well_dates, timing_dates, current_dates, ph_date)

2.05 s ± 75.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


<IPython.core.display.Javascript object>

In [272]:
%%timeit
nb_timing(well_numbers, well_dates, timing_dates, current_dates, ph_date)

1.96 ms ± 63.6 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


<IPython.core.display.Javascript object>

In [273]:
timing_cols = [
    "nwells_curr",
    "nwells_fut",
    "spud_date_curr",
    "sales_date_curr",
    "spud_date_fut",
    "sales_date_fut",
    "nwells_curr_2",
    "nwells_fut_2",
    "spud_date_curr_2",
    "sales_date_fut_2",
    "spud_date_fut_2",
    "sales_date_fut_2",
]

<IPython.core.display.Javascript object>

In [274]:
timings = pd.DataFrame(
    np.array(
        nb_timing(
            well_numbers, well_dates, timing_dates, current_dates, ph_date
        )
    ).transpose(),
    columns=timing_cols,
)

<IPython.core.display.Javascript object>

In [275]:
timings = timings.pipe(
    ccast, ([cols for cols in timing_cols if "nwells" in cols], np.uint8)
).pipe(ccast, ([cols for cols in timing_cols if "date" in cols], tf_date))

<IPython.core.display.Javascript object>