# Setup
**This analysis includes all projects for the 2019 Season (includes projects marked as "Dead")**

## Dependencies

In [26]:
import pandas as pd
from datetime import datetime

## Import Data

In [27]:
# imports the '[TVA] Q2 Workflow Analysis' data
all_project_data = "./data/all_projects.csv"

# imports the '[TVA] Orders Table Audit' data
all_production_data = "./data/all_production_projects.csv"

# imports '[W] ' data
improvements_data = "./data/improvement_table.csv"

# Parsing Data

## Project Data

### All Q2 Projects

**Includes projects marked as 'Dead'**

In [28]:
project_df = pd.read_csv(
    all_project_data, dtype={'Claim #': str,
                             'Job #': str, 'Branch': str, 'Claim Status': str},
    parse_dates=[
        'Claim # Date',
        'FTA Scope. Req Date',
        'Submit for Estimate Date',
        '[OB] Created Scope Calc',
        '[B] Created Estimate Date',
        'Job Submittal Date',
        '[B] - Date Approved by BC',
        '[OB] Completed']
)

### All Projects in Production

**Includes projects that have completed notifying the HO of the 'Roof Start' Date**

In [29]:
production_df = pd.read_csv(
    all_production_data, dtype={'Claim #': str, 'Job #': str},
    parse_dates=[
        'Permit Applied [A]',
        'Order Date',
        'Permit Received',
        'OA Date',
        'Invoice Date',
        'Ntfd H.O. Dlvry',
        'Dlvry Start',
        'Ntfd H.O. Start',
        'Roof Start']
)

## Rejection Data

### All Rejections

**Includes projects marked as 'Dead'**

In [30]:
# created 'improvements_df' to hold all dates of fta scope rejections to use correct date
improvements_df = pd.read_csv(
    improvements_data,
    dtype={'Claim #': str, 'Job #': str},
    parse_dates=['Created'])

### Latest Rejection
**Determine the most recent rejection for any project**

In [31]:
# 'idmax()' of the 'Created' column provides the most current rejection date
reject_date_df = improvements_df.loc[improvements_df.groupby('Claim #')[
    'Created'].idxmax()]

# renaming the 'created' column for clarity
reject_date_df = reject_date_df.rename(columns={"Created": "Scope Rejection Date"})

###  Multi-Rejection Counts
**Determine the amount of rejections for each project and if it was rejected more than once**

In [32]:
# creating a 'df' to count how many times a job was rejected
improvement_count_df = (improvements_df.groupby("Claim #").count())

# resets the 'claim #' from being the index
improvement_count_df.reset_index(inplace=True)

# created a list to collect the boolean response to multi-rejection count
multi_reject_list = []

# iterates over 'Created' to determine if it has been multi-rejected
for index, row in improvement_count_df.iterrows():

    if row['Created'] <= 1:
        multi_reject = False
        multi_reject_list.append(multi_reject)

    else:
        multi_reject = True
        multi_reject_list.append(multi_reject)

# adding the 'boolean' list to the 'improvements' df
improvement_count_df["Multi-rejected"] = multi_reject_list

# renaming the 'created' column for clarity
improvement_count_df = improvement_count_df.rename(columns={"Created": "Scope Rejections"})

# Merge Data

## Merge 'Project', 'Rejection', 'Multi-Reject', and 'Production' dataframes
**Merging dfs on the shared 'Claim #' Column ('Job # for 'production_df')**

In [33]:
# Merged the 'project df' merged with 'latest rejection date' merged with 
# rejection count and 'multi-rejection' boolean merged with 'production df' 
merged_df = project_df.merge(reject_date_df, how='left', on='Claim #').merge(
    improvement_count_df, how='left', on='Claim #').merge(production_df, how='left', on='Job #') 


## Cleanup Merged Data

In [34]:
# all_project_df = final_merged_df[['Claim #', 'Job #', 'Branch_x', 'Scope Rejections', 'Claim Status', 'Claim # Date', 'FTA Scope. Req Date', 'Rejection Date', 'Submit for Estimate Date', '[B] Created Estimate Date', '[OB] Created Scope Calc',
#                                   'Job Submittal Date', '[B] - Date Approved by BC', '[OB] Completed_x', 'Permit Applied [A]', 'Order Date', 'Permit Received', 'OA Date', 'Invoice Date', 'Ntfd H.O. Dlvry', 'Dlvry Start', 'Ntfd H.O. Start', 'Roof Start']]

# renaming the columnns to be able to read easier
all_project_df = merged_df.rename(columns={
    'FTA Scope. Req Date': 'Claim # Collected',
    'Rejection Date': 'FTA Scope Rejected',
    'Submit for Estimate Date': 'FTA Scope Completed',
    '[B] Created Estimate Date':'BC Estimate Completed',
    '[OB] Created Scope Calc': 'OB Scope Completed',
    'Job Submittal Date': 'Sup Submitted Job',
    '[B] - Date Approved by BC': 'BC Approved',
    '[OB] Completed': 'OB Order Built',
    'Permit Applied [A]': 'PA Permit Applied',
    'Order Date': 'GM Order Created',
    'Permit Received': 'PA Permit Processed',
    'OA Date': 'PA OA Processed',
    'Invoice Date':'PA Invoice Created',
    'Ntfd H.O. Dlvry': 'PA Notify of Delivery',
    'Dlvry Start': 'Delivery Start',
    'Ntfd H.O. Start': 'PA Notify of Start'
})

# Comparing Data

## Gathering Date Differences
**Finding any 'Submit for Estimate' and 'Rejection Date' Subsitutions**

In [35]:
# list to store the 'date diffs' value for each step'
claim_num = []
rep_claim_diff = []
fta_scope_diff = []
ob_scope_diff = []
bc_estimate_diff = []
sup_pfynr_diff = []
bc_approval_diff = []
ob_order_build_diff = []
gm_create_order_diff = []
pa_oa_processed_diff = []
pa_invoice_diff = []


# these are being recorded, but don't necessarily apply to the project workflow.
pa_permit_applied_diff = []
pa_permit_processed_diff = []
pa_notify_delivery_diff = []
pa_notify_start_diff = []


# iterating over the df to create 'date diff' variables
for index, row in all_project_df.iterrows():

    # creating 'date_diff' variables for each step in the workflow
    rep_claim_date_diff = float(
        (row['Claim # Collected'] - row['Claim # Date']).days)

    # if the record has NOT had the FTA Scope Rejected...
    if row['Scope Rejections'] != 0:

        # if the bc estimate was created prior to July 16th...
        if row['BC Estimate Completed'] <= datetime(2019, 7, 15):

            # then compare the 'bc estimate' date to the 'ob scope calc' date
            # as well as 'ob scope' date to 'fta scope' date
            fta_date_diff = (row['FTA Scope Completed'] -
                             row['Claim # Collected']).days
            ob_scope_date_diff = (
                row['OB Scope Completed'] - row['FTA Scope Completed']).days
            bc_estimate_date_diff = (
                row['BC Estimate Completed'] - row['OB Scope Completed']).days
            sup_pfynr_date_diff = (
                row['Sup Submitted Job'] - row['BC Estimate Completed']).days

        # if the record was addressed during the 'blip'...
        elif row['BC Estimate Completed'] == datetime(2019, 7, 16) or row['BC Estimate Completed'] == datetime(2019, 7, 17):

            # then compare the 'bc estimate' to the 'blip' date, and the 'ob scope' date to the new 'bc date'
            fta_date_diff = (row['FTA Scope Completed'] -
                             row['Claim # Collected']).days
            bc_estimate_date_diff = (
                row['BC Estimate Completed'] - datetime(2019, 7, 15)).days
            ob_scope_date_diff = (
                row['OB Scope Completed'] - row['BC Estimate Completed']).days
            sup_pfynr_date_diff = (
                row['Sup Submitted Job'] - row['OB Scope Completed']).days

        # if the bc estimate was created after the 'blip' on July 16th...
        else:

            # then use the new workflow dates to compare the date diffs
            fta_date_diff = (row['FTA Scope Completed'] -
                             row['Claim # Collected']).days
            bc_estimate_date_diff = (
                row['BC Estimate Completed'] - row['FTA Scope Completed']).days
            ob_scope_date_diff = (
                row['OB Scope Completed'] - row['BC Estimate Completed']).days
            sup_pfynr_date_diff = (
                row['Sup Submitted Job'] - row['OB Scope Completed']).days

    # if the record HAS has the FTA Scope Rejected...
    else:

        # and was rejected prior to the 'blip'...
        if row['BC Estimate Completed'] <= datetime(2019, 7, 15):

            # use 'rejected' date and clarify sup date diffs
            fta_date_diff = (row['FTA Scope Rejected'] -
                             row['Claim # Collected']).days
            bc_estimate_date_diff = (
                row['BC Estimate Completed'] - row['FTA Scope Completed']).days
            ob_scope_date_diff = (
                row['OB Scope Completed'] - row['FTA Scope Rejected']).days
            sup_pfynr_date_diff = (
                row['Sup Submitted Job'] - row['BC Estimate Completed']).days

        # if rejected after the 'blip'...
        else:

            # use 'rejected' date and clarify sup date diffs
            fta_date_diff = (row['FTA Scope Rejected'] -
                             row['Claim # Collected']).days
            bc_estimate_date_diff = (
                row['BC Estimate Completed'] - row['FTA Scope Completed']).days
            ob_scope_date_diff = (
                row['OB Scope Completed'] - row['FTA Scope Rejected']).days
            sup_pfynr_date_diff = (
                row['Sup Submitted Job'] - row['OB Scope Completed']).days

    bc_approval_date_diff = (
        row['BC Approved'] - row['Sup Submitted Job']).days
    ob_orderbuild_date_diff = (row['OB Order Built'] - row['BC Approved']).days
    gm_create_order_date_diff = (
        row['GM Order Created'] - row['OB Order Built']).days
    pa_oa_processed_date_diff = (
        row['PA OA Processed'] - row['GM Order Created']).days
    pa_invoice_date_diff = (
        row['PA Invoice Created'] - row['PA OA Processed']).days

    # these provide the lead times of tasks not directly impacting the workflow.
    pa_permit_applied_date_diff = (
        row['PA Permit Applied'] - row['BC Approved']).days

    pa_permit_processed_date_diff = (
        row['PA Permit Processed'] - row['PA Permit Applied']).days

    pa_notify_delivery_date_diff = (
        row['Delivery Start'] - row['PA Notify of Delivery']).days

    pa_notify_start_date_diff = (
        row['Roof Start'] - row['PA Notify of Start']).days

    # appending 'date diff' values to lists to create each df column
    claim_num.append(row["Claim #"])
    rep_claim_diff.append(rep_claim_date_diff)
    fta_scope_diff.append(fta_date_diff)
    ob_scope_diff.append(ob_scope_date_diff)
    bc_estimate_diff.append(bc_estimate_date_diff)
    sup_pfynr_diff.append(sup_pfynr_date_diff)
    bc_approval_diff.append(bc_approval_date_diff)
    ob_order_build_diff.append(ob_orderbuild_date_diff)
    gm_create_order_diff.append(gm_create_order_date_diff)
    pa_oa_processed_diff.append(pa_oa_processed_date_diff)
    pa_invoice_diff.append(pa_invoice_date_diff)
    pa_permit_applied_diff.append(pa_permit_applied_date_diff)
    pa_permit_processed_diff.append(pa_permit_processed_date_diff)
    pa_notify_delivery_diff.append(pa_notify_delivery_date_diff)
    pa_notify_start_diff.append(pa_notify_start_date_diff)

## Creating 'Workflow Days' df
**The days between each Teammate step in the workflow (excluding some production processes)**

In [36]:
# creating the 'days_df' to hold all date values for each role in the project
days_df = pd.DataFrame({
    "claim_#": claim_num,
    "rep_claim": rep_claim_diff,
    "fta_scope": fta_scope_diff,
    "bc_estimate": bc_estimate_diff,
    "ob_scope": ob_scope_diff,
    "sup_pfynr": sup_pfynr_diff,
    "bc_approval": bc_approval_diff,
    "ob_orderbuild": ob_order_build_diff,
    "gm_create_order": gm_create_order_diff,
    "pa_oa_process": pa_oa_processed_diff,
    "pa_invoice": pa_invoice_diff,
})

# creating a column holding the running tally across a row (project)
# can be done because not including 'date diffs' on non-workflow items
days_df['total_days'] = days_df.sum(axis=1)

# Export Data

In [37]:
# 'projects' and 'workflow days' CSVs

all_project_df.to_csv("data/project_table.csv", index=False)

days_df.to_csv("data/workflow_days.csv", index=False)
