# Setup

## Dependencies

In [1]:
import pandas as pd
import sqlite3

from datetime import datetime

## Importing Data

In [2]:
# imports each tv table data, via external csvs
project_data = "./data/customers_table.csv"

# Workflow Data

## Separate completed Jobs to 'in_production_df'

**Jobs where '[OB] Completed' and 'Job #' is not blank**

In [4]:
# mydateparser = lambda x: pd.datetime.strptime(x, "%Y %m %d %H:%M:%S")

# reading the project data into a df
project_df = pd.read_csv(project_data,
                         dtype={'Claim #': str, 'Job #': str},
                         parse_dates=['Claim # Date', 'FTA Scope. Req Date',
                                      'Submit for Estimate Date', '[OB] Created Scope Calc',
                                      '[B] Created Estimate Date', 'Job Submittal Date',
                                      '[B] - Date Approved by BC', '[OB] Completed'],)
#                         date_parser=mydateparser)

# created 'in_production_df' to hold all current jobs in production, to avoid 'NaN' date values
in_production_df = (project_df.loc[
    (project_df['[OB] Completed'].isnull() == False) & 
    (project_df["Job #"].isnull() == False), :])

pre_production_df = (project_df.loc[(project_df["Job #"].isnull() == True), :])


In [19]:
tester = project_df['Submit for Estimate Date'][0]
dtype(tester)


NameError: name 'dtype' is not defined

## Create the 'days_df' holding days information

In [25]:
# lists to collect the 'date diffs'
claim_num = []                         
job_num = []
rep_claim_diff = []
fta_scopes_diff = []
ob_scope_diff = []
bc_estimate_diff = []
sup_pfynr_diff = []
bc_approvals_diff = []
ob_order_builds_diff = []
total_days = []

# iterating over the df to create 'date diff' variables
for index, row in in_production_df.iterrows():

    # creating 'date_diff' variables for each step in the workflow
    rep_claim_date_diff = (row["FTA Scope. Req Date"] - row["Claim # Date"]).days
    fta_date_diff = (row["Submit for Estimate Date"] - row["FTA Scope. Req Date"]).days
    ob_scope_date_diff = (row['[OB] Created Scope Calc'] - row['Submit for Estimate Date']).days
    bc_estimate_date_diff = (row['[B] Created Estimate Date'] - row['[OB] Created Scope Calc']).days
    sup_pfynr_date_diff = (row["Job Submittal Date"] - row["[B] Created Estimate Date"]).days
    bc_approval_date_diff = (row["[B] - Date Approved by BC"] - row["Job Submittal Date"]).days
    ob_orderbuild_date_diff = (row['[OB] Completed'] - row['[B] - Date Approved by BC']).days
    
    # adding up all of the 'date_diff' variables above and assigning to 'total_days_sum'
    day_diffs = [rep_claim_date_diff, fta_date_diff, ob_scope_date_diff, bc_estimate_date_diff, sup_pfynr_date_diff, bc_approval_date_diff, ob_orderbuild_date_diff]
    total_days_sum = sum(day_diffs)

    # appending 'date diff' values to lists to create each df column
    claim_num.append(row["Claim #"])
    job_num.append(row["Job #"])
    rep_claim_diff.append(rep_claim_date_diff)
    fta_scopes_diff.append(fta_date_diff)
    ob_scope_diff.append(ob_scope_date_diff)
    bc_estimate_diff.append(bc_estimate_date_diff)
    sup_pfynr_diff.append(sup_pfynr_date_diff)
    bc_approvals_diff.append(bc_approval_date_diff)
    ob_order_builds_diff.append(ob_orderbuild_date_diff)
    total_days.append(total_days_sum)

 # creating the 'days_df' to hold all date values for each role in the project
days_df = pd.DataFrame({
    "claim_#" : claim_num,
    "job_#" : job_num,
    "rep_claim" : rep_claim_diff, 
    "fta_scope" : fta_scopes_diff,
    "ob_scope" : ob_scope_diff,
    "bc_estimate" : bc_estimate_diff,
    "sup_pfynr" : sup_pfynr_diff,
    "bc_approval" : bc_approvals_diff,
    "ob_orderbuild" : ob_order_builds_diff,
    "total_days" : total_days
})    


# Export Data

In [27]:
# writes the df to a csv file; 'index=True' writes row names (default)
in_production_df.to_csv("data/in_production.csv", index=False)

pre_production_df.to_csv("data/pre_production.csv", index=False)

days_df.to_csv("data/workflow_days.csv", index=False)