## Extract Data from Emails - Part 2 of 2
The first part created the initial file after going through all the emails in inbox. The second part appends to the initial file after extracting information from emails between the current code execution date and when the latest file was created. Additionally, the code deletes older files in the destination path, creates visualizations in Plotly, and combines the plots in a basic dashboard in Dash.

### 1. Import Modules

In [1]:
import pandas as pd
import numpy as np
import os
import win32com.client
import re
import time
import dateutil.parser
from dateutil import parser
from dateutil import relativedelta
from datetime import datetime
from pandas import ExcelWriter
from pandas import ExcelFile
from plotly import tools
import plotly.offline as pyo
import plotly.graph_objs as go
import dash
import dash_core_components as dcc
import dash_html_components as html

### 2. Latest File

In [2]:
# Determine the latest xlsx file.
dates = []
dest_path = os.getcwd() + '\\'
for file in os.listdir(dest_path):
    # Extract the date from the file's name.
    date_file_text = re.search(r'Report_(.*?).xlsx', file)
    if date_file_text != None:
        # Extract the date string from the match object.
        date_file_str = date_file_text.group(1)
        # Convert to date format.
        date_file_dtf = datetime.strptime(date_file_str, '%m_%d_%Y')
        # Append to dates list.
        dates.append([date_file_dtf, date_file_str])
        # Sort by latest date.
        dates_sort = sorted(dates, reverse=True)
        # Match latest date with file name.
        if dates_sort[0][1] in file:
            latest_file = file

In [3]:
# Read the latest xlsx file.
df_file_summary = pd.ExcelFile(latest_file).parse(0)
df_file_preval = pd.ExcelFile(latest_file).parse(1)
df_file_errors = pd.ExcelFile(latest_file).parse(2)
df_file_certs = pd.ExcelFile(latest_file).parse(3)

# Create agency code column.
df_file_preval['agency_code'] = df_file_preval['file_name_preval'].str.extract('_(.*?)_', expand=True)
df_file_errors['agency_code'] = df_file_errors['file_name_error'].str.extract('_(.*?)_', expand=True)
df_file_certs['agency_code'] = df_file_certs['file_name'].str.extract('_(.*?)_', expand=True)

### 3. Initiate

In [4]:
# Set Outlook settings to retrieve messages from inbox.
outlook = win32com.client.Dispatch("Outlook.Application").GetNamespace("MAPI")
inbox = outlook.GetDefaultFolder(6) # For main inbox
# Restrict emails only from those that were received after the latest file was created.
messages = inbox.Items.Restrict("[CreationTime] >='" + date_file_str + "'")

# Set initial lists and dataframes.
submission = []
df_preval = pd.DataFrame()
df_prefain = pd.DataFrame()
df_error = pd.DataFrame()
df_errfain = pd.DataFrame()
df_cert = pd.DataFrame()
df_certfain = pd.DataFrame()

# Create function to set agency based on file name.
def set_agency(row):
    if 'AM00' in str(row['file_name']):
        return 'Agricultural Marketing Service'
    elif 'AO00' in str(row['file_name']):
        return 'Office of Advocacy and Outreach'
    elif 'AP00' in str(row['file_name']):
        return 'Animal and Plant Health Inspection Service'
    elif 'AP02' in str(row['file_name']):
        return 'Federal Shared Service Provider'
    elif 'AR00' in str(row['file_name']) or 'AROO' in str(row['file_name']):
        return 'Agricultural Research Service'
    elif 'EC00' in str(row['file_name']):
        return 'Office of the Chief Economist'
    elif 'ER00' in str(row['file_name']):
        return 'Economic Research Service'
    elif 'FA00' in str(row['file_name']):
        return 'Farm Service Agency'
    elif 'FI00' in str(row['file_name']):
        return 'Food Safety and Inspection Service'
    elif 'FN00' in str(row['file_name']):
        return'Food and Nutrition Service'
    elif 'FS00' in str(row['file_name']):
        return 'Forest Service'
    elif 'FX00' in str(row['file_name']):
        return 'Foreign Agricultural Service'
    elif 'NA00' in str(row['file_name']):
        return 'National Agricultural Statistics Service'
    elif 'NI00' in str(row['file_name']):
        return 'National Institute of Food and Agriculture'
    elif 'NR00' in str(row['file_name']):
        return 'Natural Resources Conservation Service '
    elif 'RD00' in str(row['file_name']):
        return 'Rural Development'
    elif 'RM00' in str(row['file_name']):
        return 'Risk Management Agency'
    else:
        return ''

# Function to create agency code and agency columns. 
def agency_col(df):
    # Extract agency code from file name.
    df['agency_code'] = df['file_name'].str.extract('_(.*?)_', expand=True)
    # Apply set_agency function.
    df['agency'] = df.apply(set_agency, axis=1)

### 4. Loop Through and Set

In [5]:
# Loop through each message and filter accordingly.
# Two sets of dataframes will be created for each category - one summarizes the metrics and other consists of the actual records.
for m in messages:
    #Extract date.
    date_str = m.CreationTime.strftime('%Y-%m-%d %H:%M:%S')
    date = dateutil.parser.parse(date_str)
    
    # Retrieve file submission messages.
    if m.Subject.upper() == 'DATA ACT FILE SUBMISSION' and m.SenderName == 'OCFO - FMMI BI TEAM' or m.Subject.upper() == 'DATA ACT FILE SUBMISSION' and m.SenderName == 'FMMIBITEAM@cfo.usda.gov':
        # Set file name and date received and agency.
        date_submission = date
        submission_file_name = re.search(r'file (.*?) has', m.Body).group(1)
        # Append all records to submission list.
        submission.append({'file_submission_notification_date': date_submission,'file_name': submission_file_name})
        # Create dataframe.
        df_submission = pd.DataFrame(submission)
   
    # Retrieve pre-validation error messages.
    if m.Subject.upper() == 'DATA ACT D2 FILE SUBMISSION - PRE-VALIDATION ERROR' and  m.SenderName == 'BEREMOTE':
        # Set date received.
        date_preval = date
        # Set filename and filepath and save to current directory.
        attachment = m.Attachments.Item(1)
        preval_attachment = attachment.FileName
        attachment.SaveAsFile(os.getcwd() + '\\' + preval_attachment)
        preval_file_name = preval_attachment[:-10]
        
        # Open text file with utf8 to prevent encoding errors. 
        with open (preval_attachment, 'rt', encoding='utf8') as txt_file:
            preval_content = txt_file.read()
            # Count instances of 'Error on line' in the text file.
            preval_count = len(re.findall(r'Error on line:', preval_content, re.IGNORECASE))
            # Split the text at "Error on line" for FAIN extraction.
            preval_text = preval_content.split("Error on line:",preval_count)[1:preval_count+1]
            
            # Loop through each resulting list element from the preceding split to extract the FAIN and action date.
            for string in preval_text:
                try:
                    # Extract data elements after splitting at commas.
                    preval_actdate = string.split(",")[5].strip() # Action date
                    preval_fain = string.split(",")[8].strip() # FAIN
                    preval_mod = string.split(",")[9].strip() # Award modification
                    preval_uri = string.split(",")[10].strip() # URI
                    preval_cfda = string.split(",")[32].strip() # CFDA
                except Exception:
                    preval_actdate = ''
                    preval_fain = ''
                    preval_mod = ''
                    preval_uri = ''
                    preval_cfda = ''
                
                #Convert to dataframe and set index=[0] to prevent scalar value error.
                prefain = pd.DataFrame({'file_name': preval_file_name, 'notification_date': date_preval, 'action_date': preval_actdate, 'fain': preval_fain, 'award_modification': preval_mod, 'uri': preval_uri, 'cfda_number': preval_cfda}, index=[0])
                # Append all dataframes created by the for loop.
                df_prefain = df_prefain.append(prefain)
                
        # Apply agency_col function on the dataframe to create agency column.
        agency_col(df_prefain)
        
        # Create a separate dataframe for the pre-validation error count.    
        # Convert to dataframe and set index=[0] to prevent scalar value error.
        preval_report = pd.DataFrame({'file_name': preval_file_name, 'preval_error_count': preval_count, 'preval_notification_date': date_preval}, index=[0])
        # Append all dataframes created by the for loop.
        df_preval = df_preval.append(preval_report)
        # Delete file from current directory.
        os.remove(preval_attachment)

    # Retrieve D2 error report messages. 
    if 'DATA ACT D2 ERROR REPORT' in m.Subject.upper() and m.SenderName == 'OCFO - FMMI BI TEAM':
        # Set filename and filepath and save to current directory.
        attachment = m.Attachments.Item(1)
        error_attachment = attachment.FileName
        attachment.SaveAsFile(os.getcwd() + '\\' + error_attachment)
        
        # Extract the reporting period date from the attachment filename and convert to date time format.
        date_error_text = error_attachment[-24:-5]
        # date_error_text = re.search(r'period (.*?)\.', m.Body).group(1) # Extract date from email body.
        date_error_replace = date_error_text.replace('-','')
        date_error = datetime.strptime(date_error_replace, '%Y%m%d%H%M%S')
        
        # Read into dataframe. 
        error_file = pd.read_excel(error_attachment, sheet_name='D2_Error_Records')
        # Remove whitespace from column names.
        error_file.rename(columns=lambda x: x.strip(), inplace=True)
        # Iterate through different versions of the source file column name to determine the correct one.
        for col in error_file.columns:
            if col=='Source File Name' or col=='FLEX_SOURCE_FILE_NAME':
                source_col = col
        
        # Get error count in the source file name column.
        error_file_count = error_file.loc[:,source_col].value_counts()
        # Extract the action date, FAIN and source file name columns.
        error_fain = error_file.loc[:,['Action Date', 'FAIN', 'Award Modification Amendment Number', 'URI', 'Federal Action Obligation', 'CFDA Number', source_col]]
        # Rename columns.
        error_fain.rename(columns={'Action Date': 'action_date', 'FAIN': 'fain', 'Award Modification Amendment Number': 'award_modification', 'URI': 'uri', 'Federal Action Obligation': 'obligation', 'CFDA Number': 'cfda_number', source_col: 'file_name'}, inplace=True)
        # Add an error date column.
        error_fain['D2_error_reporting_period'] = date_error
        # Append all dataframes.
        df_errfain = df_errfain.append(error_fain)
        # Apply agency_col function on the dataframe to create agency column.
        agency_col(df_errfain)
        
        # Create a separate dataframe for the D2 error count.
        # Convert index (i.e., filename) and value (i.e., count) to list.
        error_file_names = error_file_count.index.tolist()
        error_count = error_file_count.values.tolist()
        # List comprehension to assign reporting period date to each record.
        error_report_period = [date_error for f in error_file_names] 
        # Convert to dataframe.
        error_report = pd.DataFrame({'file_name': error_file_names, 'D2_error_count': error_count,'D2_error_reporting_period': error_report_period})
        # Append all dataframes created by the for loop.
        df_error = df_error.append(error_report)
        # Delete file from current directory.
        os.remove(error_attachment)
        
    # Retrieve D2 certification messages. 
    if 'DATA ACT D2 CERTIFICATION REPORT' in m.Subject.upper() and m.SenderName == 'OCFO - FMMI BI TEAM':       
        # Set filename and filepath and save to current directory. 
        attachment = m.Attachments.Item(1)
        cert_attachment = attachment.FileName
        attachment.SaveAsFile(os.getcwd() + '\\' + cert_attachment)
        
        # Extract the reporting period date from the attachment filename and convert to date time format.
        date_cert_text = cert_attachment[-24:-5]
        # date_cert_text = re.search(r'period (.*?)\.', m.Body).group(1) # Extract date from email body.
        date_cert_replace = date_cert_text.replace('-','')
        date_cert = datetime.strptime(date_cert_replace, '%Y%m%d%H%M%S')
        
        # Read into dataframe. 
        cert_file = pd.read_excel(cert_attachment, sheet_name='DATA ACT D2 CERTIFICATION REPOR')
        # Remove whitespace from column names.
        cert_file.rename(columns=lambda x: x.strip(), inplace=True)
        # Iterate through different versions of the source file column name to determine the correct one.
        for col in cert_file.columns:
            if col=='Source File Name' or col=='FLEX_SOURCE_FILE_NAME':
                source_col = col
        
        # Get cert count in the source file name column.
        cert_file_count = cert_file.loc[:,source_col].value_counts()
        # Extract the action date, FAIN and source file name columns.
        cert_fain = cert_file.loc[:,['ActionDate', 'FAIN', 'AwardModificationAmendmentNumber', 'URI', 'CFDA_Number', 'FederalActionObligation', source_col]]
        # Rename columns.
        cert_fain.rename(columns={'ActionDate': 'action_date', 'FAIN': 'fain', 'AwardModificationAmendmentNumber': 'award_modification', 'URI': 'uri', 'CFDA_Number': 'cfda_number', 'FederalActionObligation': 'obligation', source_col: 'file_name'}, inplace=True)
        # Add a cert date column.
        cert_fain['D2_cert_reporting_period'] = date_cert
        # Append all dataframes.
        df_certfain = df_certfain.append(cert_fain)
        # Apply agency_col function on the dataframe to create agency column.
        agency_col(df_certfain)
        
        # Create a separate dataframe for the D2 cert count.
        # Convert index (i.e., filename) and value (i.e., count) to list.
        cert_file_names = cert_file_count.index.tolist()
        cert_count = cert_file_count.values.tolist()
        # List comprehension to assign reporting period date to each record.
        cert_report_period = [date_cert for f in cert_file_names]
        # Convert to dataframe.
        cert_report = pd.DataFrame({'file_name': cert_file_names, 'D2_certification_count': cert_count,'D2_cert_reporting_period': cert_report_period})
        # Append all dataframes created by the for loop.
        df_cert = df_cert.append(cert_report)
        # Delete attachment from current directory.
        os.remove(cert_attachment)

### 5. Dataframe Processing

#### a. Submission Summary

In [6]:
# Apply agency_col function on df_submission to create agency column.
agency_col(df_submission)
# Drop agency code column.
df_submission = df_submission.drop('agency_code', axis=1)

# Create subsets of the data from the latest file (to match with current dataframes).
df_file_summ_sub = df_file_summary[['file_name', 'file_submission_notification_date', 'agency']]
df_file_summ_preval = df_file_summary[['file_name', 'preval_error_count', 'preval_notification_date']]
df_file_summ_err = df_file_summary[['file_name', 'D2_error_count', 'D2_error_reporting_period']]
df_file_summ_cert = df_file_summary[['file_name', 'D2_certification_count', 'D2_cert_reporting_period']]

# Append the current dataframes to the corresponding subsets.
df_summ_comb = pd.concat([df_submission, df_file_summ_sub])
df_prefile_comb = pd.concat([df_preval, df_file_summ_preval])
df_errfile_comb = pd.concat([df_error, df_file_summ_err])
df_certfile_comb = pd.concat([df_cert, df_file_summ_cert])

# Create a sort and drop function.
# Sort dataframes by descending date then drop duplicates (based on filename) and keep first instance (i.e., most recent based on sort).
def sort_drop(df,col1,col2):
    df.sort_values(by=[col1], ascending=False, inplace=True)
    df = df.drop_duplicates(subset=[col2], keep='first', inplace=True)
    
# Apply the sort_drop function on the appended dataframes.
sort_drop(df_summ_comb, 'file_submission_notification_date', 'file_name')
sort_drop(df_prefile_comb, 'preval_notification_date', 'file_name')
sort_drop(df_errfile_comb, 'D2_error_reporting_period', 'file_name') 
sort_drop(df_certfile_comb, 'D2_cert_reporting_period', 'file_name')

# Merge dataframes on file_name.
df_comb1 = pd.merge(df_summ_comb, df_prefile_comb, how='outer', on='file_name').fillna('')
df_comb2 = pd.merge(df_comb1, df_errfile_comb, how='outer', on='file_name').fillna('')
df_final_summ_comb = pd.merge(df_comb2, df_certfile_comb, how='outer', on='file_name').fillna('')

# Rearrange columns.
df_final_summ_comb = df_final_summ_comb[['file_submission_notification_date', 'file_name', 'agency', 'preval_error_count', 'preval_notification_date', 'D2_error_count', 'D2_error_reporting_period', 'D2_certification_count', 'D2_cert_reporting_period']]

#### b. Prevalidation, Errors, and Certification Records

In [8]:
# Drop and rename applicable columns to match for dataframe merging.
# Drop columns from latest file.
df_file_preval = df_file_preval.drop(['D2_cert_reporting_period', 'file_name_cert'], axis=1)
df_file_errors = df_file_errors.drop(['D2_cert_reporting_period', 'file_name_cert'], axis=1)

# Rename file_name columns in current dataframes.
df_prefain.rename(columns={'file_name': 'file_name_preval'}, inplace=True)
df_errfain.rename(columns={'file_name': 'file_name_error'}, inplace=True)

# Append current dataframes to latest file data.
df_preval_comb = pd.concat([df_prefain, df_file_preval])
df_error_comb = pd.concat([df_errfain, df_file_errors])
df_cert_comb = pd.concat([df_certfain, df_file_certs])

# Convert award_modification column to float to make format consistent for further procesing.
# Function to convert.
def con_num(df):
    df['award_modification'] = pd.to_numeric(df['award_modification'], errors='coerce')

# Apply con_num function.
con_num(df_preval_comb)
con_num(df_error_comb)
con_num(df_cert_comb)

# Function to create unique column for matching.
def key_col(df):
    df['ag_fain_mod_uri_cfda'] = df['agency_code'].astype(str) + df['fain'].astype(str) + df['award_modification'].astype(str) + df['uri'].astype(str) + df['cfda_number'].astype(str).str[:6]

# Apply the key_col function on the appended dataframes.
key_col(df_preval_comb)
key_col(df_error_comb)
key_col(df_cert_comb)

# Apply the sort_drop function to delete duplicates.
sort_drop(df_error_comb, 'D2_error_reporting_period', 'ag_fain_mod_uri_cfda') 
sort_drop(df_cert_comb, 'D2_cert_reporting_period', 'ag_fain_mod_uri_cfda')

# Find matching records between prevalidation-certified and error-certified dataframes.
df_pre_cert = pd.merge(df_preval_comb, df_cert_comb[['ag_fain_mod_uri_cfda', 'D2_cert_reporting_period', 'file_name']], how='left', on='ag_fain_mod_uri_cfda')
df_err_cert = pd.merge(df_error_comb, df_cert_comb[['ag_fain_mod_uri_cfda', 'D2_cert_reporting_period', 'file_name']], how='left', on='ag_fain_mod_uri_cfda')

# Rename and rearrange columns.
df_pre_cert.rename(columns={'file_name': 'file_name_cert'}, inplace=True)
df_err_cert.rename(columns={'file_name': 'file_name_cert'}, inplace=True)
df_pre_cert = df_pre_cert[['file_name_preval', 'notification_date', 'agency', 'fain', 'uri', 'action_date', 'award_modification', 'cfda_number', 'file_name_cert', 'D2_cert_reporting_period']]
df_err_cert = df_err_cert[['file_name_error', 'D2_error_reporting_period', 'agency', 'fain', 'uri', 'action_date', 'obligation', 'award_modification', 'cfda_number', 'file_name_cert', 'D2_cert_reporting_period']]
df_cert_comb = df_cert_comb[['file_name', 'D2_cert_reporting_period', 'agency', 'fain', 'uri', 'action_date', 'obligation', 'award_modification', 'cfda_number']]

### 6. Create xlsx File

In [9]:
# Create an xlsx file, one sheet for each dataframe.
# Set timestamp and file destination path.
timestr = time.strftime('%m_%d_%Y')
# dest_path = r'\\wdcnetapp01\\CFOData$\\Data\\TARD\\DATA Act\\DATA Act D2 Error-Certs Files\\McStay report\\'

# Set file name and save to path.
file_name = 'D2_Files_Tracking_Report_'+ timestr + '.xlsx'
writer = pd.ExcelWriter(dest_path + file_name, engine='xlsxwriter')
df_final_summ_comb.to_excel(writer, 'Files_Summary', index=False)
df_pre_cert.to_excel(writer, 'Prevals', index=False)
df_err_cert.to_excel(writer, 'Errors', index=False)
df_cert_comb.to_excel(writer, 'Certs', index=False)
writer.save()

### 7. Folder Cleanup

In [10]:
# Delete files in the destination folder older than three months.
# Loop through each file.
today = date.today()
for file in os.listdir(dest_path):
    # Set file path.
    file_path = os.path.join(dest_path, file)
    # Extract the date from the file's name.
    date_file_text = re.search(r'Report_(.*?).csv', file) or re.search(r'Report_(.*?).xlsx', file)
    
    # Use conditonal to prevent errors from non-matching files. 
    if date_file_text != None:
        # Extract the date string from the match object.
        date_file = date_file_text.group(1)
        # Convert string to date time.
        date_file = datetime.strptime(date_file, '%m_%d_%Y')
        # Get difference between today and file's date.
        date_diff = relativedelta.relativedelta(today, date_file)
        months = date_diff.months
        days = date_diff.days
        
        # Conditional to delete files older than three months.
        if months >= 3 and days > 0:
            os.unlink(file_path)

### 8. Plotly

#### a. Bar Chart
Agency totals for errors and certified records.

In [11]:
# Function to create agency acronym column.
def agency_acronym(row):
    if row['agency'] == 'Agricultural Marketing Service':
        return 'AMS'
    elif row['agency'] == 'Office of Advocacy and Outreach':
        return 'OAO'
    elif row['agency'] == 'Animal and Plant Health Inspection Service':
        return 'APHIS'
    elif row['agency'] == 'Federal Shared Service Provider':
        return 'FSSP'
    elif row['agency'] == 'Agricultural Research Service':
        return 'ARS'
    elif row['agency'] == 'Office of the Chief Economist':
        return 'OCE'
    elif row['agency'] == 'Economic Research Service':
        return 'ERS'
    elif row['agency'] == 'Farm Service Agency':
        return 'FSA'
    elif row['agency'] == 'Food Safety and Inspection Service':
        return 'FSIS'
    elif row['agency'] == 'Food and Nutrition Service':
        return 'FNS'
    elif row['agency'] == 'Forest Service':
        return 'FS'
    elif row['agency'] == 'Foreign Agricultural Service':
        return 'FAS'
    elif row['agency'] == 'National Agricultural Statistics Service':
        return 'NASS'
    elif row['agency'] == 'National Institute of Food and Agriculture':
        return 'NIFA'
    elif row['agency'] == 'Natural Resources Conservation Service ':
        return 'NRCS'
    elif row['agency'] == 'Rural Development':
        return 'RD'
    elif row['agency'] == 'Risk Management Agency':
        return 'RMA'
    else:
        return ''

def agency_acr_col(df):
    df['ag_acr'] = df.apply(agency_acronym, axis=1)

In [12]:
# Groupby agency.
df_agency_err = df_err_cert.groupby('agency').agg({'file_name_error': 'count', 'file_name_cert': 'count'}).reset_index()
df_agency_err['outstanding_errors'] = df_agency_err['file_name_error'] - df_agency_err['file_name_cert']
df_agency_cert = df_cert_comb.groupby('agency').agg({'file_name': 'count'}).reset_index()

# Apply agency_acronym function.
agency_acr_col(df_agency_err)
agency_acr_col(df_agency_cert)

# Create error bar chart.
trace_errb1 = go.Bar(
    x=df_agency_err['ag_acr'],  
    y=df_agency_err['outstanding_errors'],
    name = 'Outstanding Errors', 
    marker=dict(color='red')
)
trace_errb2 = go.Bar(
    x=df_agency_err['ag_acr'],
    y=df_agency_err['file_name_cert'],
    name='Resolved Errors', 
    marker=dict(color='green')
)
data_errb = [trace_errb1, trace_errb2]
layout_errb = go.Layout(
    title='<b>Total Errors</b>',
    font=dict(family='century gothic', size=13),
    barmode='stack'
)
fig_errb = go.Figure(data=data_errb, layout=layout_errb)
# pyo.plot(fig_errb, filename='D2_Outstanding_Errors.html') # Create an html file.

# Create certified records bar chart.
data_certb = [go.Bar(
    x=df_agency_cert['ag_acr'],  
    y=df_agency_cert['file_name']
)]
layout_certb = go.Layout(
    title='<b>Total Certified Records</b>',
    font=dict(family='century gothic', size=13)
)
fig_certb = go.Figure(data=data_certb, layout=layout_certb)
# pyo.plot(fig_certb, filename='D2_Certified_Records.html') # Create an html file.

#### b. Line Chart
Monthly plots of errors and certified records.

In [13]:
# Create a month-year column.
# Function to create and convert to date format the month-year column.
def date_convert(df,col):
    df['notification_month_year'] = df[col].dt.strftime('%B %Y')
    df['notification_month_year'] = pd.to_datetime(df['notification_month_year'],format='%B %Y')

# Apply date_convert function.
date_convert(df_err_cert,'D2_error_reporting_period')
date_convert(df_cert_comb,'D2_cert_reporting_period')

# Groupby month-year, one for total and another broken down by agency 
err_date = df_err_cert.groupby(['notification_month_year','agency'])[['file_name_error']].count().reset_index()
err_date_total = df_err_cert.groupby(['notification_month_year'])[['file_name_error']].count().reset_index()
cert_date = df_cert_comb.groupby(['notification_month_year','agency'])[['file_name']].count().reset_index()
cert_date_total = df_cert_comb.groupby(['notification_month_year'])[['file_name']].count().reset_index()

# Apply agency_acronym function.
agency_acr_col(err_date)
agency_acr_col(cert_date)

# Agency breakdown line chart for errors.
data_errl=[]
for agency in err_date['ag_acr'].unique():
    trace_errl = go.Scatter(
    x=err_date['notification_month_year'][err_date['ag_acr']==agency],
    y=err_date['file_name_error'][err_date['ag_acr']==agency],
    name=agency
    )
    data_errl.append(trace_errl)
layout_errl = go.Layout(
    title='<b>Monthly Errors</b>',
    font=dict(family='century gothic', size=13)
    )
fig_errl = go.Figure(data=data_errl, layout=layout_errl)
# pyo.plot(fig_errl, filename='D2_Monthly_Errors.html') # Create an html file.

# Agency breakdown line chart for certified records.
data_certl=[]
for agency in cert_date['ag_acr'].unique():
    trace_certl = go.Scatter(
    x=cert_date['notification_month_year'][cert_date['ag_acr']==agency],
    y=cert_date['file_name'][cert_date['ag_acr']==agency],
    name=agency
)
    data_certl.append(trace_certl)
layout_certl = go.Layout(
    title='<b>Monthly Certified Records</b>',
    font=dict(family='century gothic', size=13)
    )
fig_certl = go.Figure(data=data_certl, layout=layout_certl)
# pyo.plot(fig_certl, filename='D2_Monthly_Certified.html') # Create an html file.

#### c. Subplots
Combine the separate plots and show on one page using Plotly's subplot feature (i.e., in lieu of a dashboard).

In [14]:
# Define subplot features.
fig_subp = tools.make_subplots(rows=2, cols=2,
                           shared_xaxes=True,
                           shared_yaxes=True,
                           subplot_titles=('<b>Total Certified Records</b>', '<b>Monthly Certified Records</b>',
                                           '<b>Total Errors</b>', '<b>Monthly Errors</b>'))

# Set plot colors.
colors = {'AMS':'blue',
          'OAO':'olivedrab',
          'APHIS':'brown',
          'FSSP':'burlywood',
          'ARS':'chartreuse',
          'OCE':'coral',
          'ERS':'darkcyan',
          'FSA':'orange',
          'FSIS':'hotpink',
          'FNS':'indianred',
          'FS':'red',
          'FAS':'darkviolet',
          'NASS':'tomato',
          'NIFA':'gold',
          'NRCS':'seagreen',
          'RD':'purple',
          'RMA':'yellow',
          }

# Append certified records bar chart.
fig_subp.append_trace({'x':df_agency_cert['ag_acr'],
                       'y':df_agency_cert['file_name'],
                       'name':'Certified Records',
                       'marker':dict(color='darkblue'),
                       'type':'bar'}, row=1, col=1)

# Append error bar charts.    
fig_subp.append_trace({'x':df_agency_err['ag_acr'],
                       'y':df_agency_err['outstanding_errors'],
                       'name':'Outstanding Errors',
                       'marker':dict(color='red'),
                       'type':'bar',
                       'offset': -0.4}, row=2, col=1)

fig_subp.append_trace({'x':df_agency_err['ag_acr'],
                       'y':df_agency_err['file_name_cert'],
                       'name':'Resolved Errors',
                       'marker':dict(color='green'),
                       'type':'bar',
                       'offset': -0.4}, row=2, col=1)

# Append certified records line chart.
data_certl_sp=[]
for agency in cert_date['ag_acr'].sort_values().unique():
    fig_subp.append_trace({'x':cert_date['notification_month_year'][cert_date['ag_acr']==agency],
                           'y':cert_date['file_name'][cert_date['ag_acr']==agency],
                           'name':agency,
                           'legendgroup':agency,
                           'marker':{'color': colors[agency]},
                           'type':'scatter'}, row=1, col=2)
    data_certl_sp.append(agency)

# Append error line charts.
data_errl_sp=[]
for agency in err_date['ag_acr'].unique():
    fig_subp.append_trace({'x':err_date['notification_month_year'][err_date['ag_acr']==agency],
                           'y':err_date['file_name_error'][err_date['ag_acr']==agency],
                           'name':agency,
                           'legendgroup':agency,
                           'showlegend' : False if agency in data_certl_sp else True,
                           'marker':{'color': colors[agency]},
                           'type':'scatter'}, row=2, col=2)              
    data_errl_sp.append(agency)

# Set plot name.
fig_subp['layout'].update(title='<b>D2 Files Summary</b> '+'<b>'+timestr+'</b>')
# Set subplot titles' font.
for i in fig_subp['layout']['annotations']:
    i['font'] = dict(family='century gothic', size=15)
# Create html file.
pyo.plot(fig_subp, filename='D2_Plots.html')

This is the format of your plot grid:
[ (1,1) x1,y1 ]  [ (1,2) x2,y1 ]
[ (2,1) x1,y2 ]  [ (2,2) x2,y2 ]



'file://C:\\Users\\JMcStay\\Desktop\\Python Projects\\D2_Plots.html'

### 9. Dash
Create a basic dashboard in Dash.

In [15]:
# Use the Plotly plots from 8a and 8b and append onto dashboard. 
app = dash.Dash()
app.layout = html.Div([
    html.H1(
        'D2 Files Summary', 
        style={
            'textAlign': 'center',
            'font-family': 'century gothic'
        }
    ),
    
    html.Div(
        children='As of '+ timestr,
        style={
            'textAlign': 'center',
            'font-family': 'century gothic',
            'fontSize':20
        }
    ),
        
    html.Div([
        dcc.Graph(
            id='cert_bars', 
            figure=fig_certb
        ),
        dcc.Graph(
            id='error_bars', 
            figure=fig_errb)], 
        style={
            'width': '50%', 
            'display': 'inline-block'
        }
    ),
    
    html.Div([
        dcc.Graph(
            id='cert_lines', 
            figure=fig_certl
        ),
        dcc.Graph(
            id='error_lines',
            figure=fig_errl)], 
        style={
            'width': '50%', 
            'display': 'inline-block'
        }
    )
])
if __name__ == '__main__':
    app.run_server()

 * Running on http://127.0.0.1:8050/ (Press CTRL+C to quit)
127.0.0.1 - - [19/Sep/2018 09:15:30] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [19/Sep/2018 09:15:32] "GET /_dash-layout HTTP/1.1" 200 -
127.0.0.1 - - [19/Sep/2018 09:15:32] "GET /_dash-dependencies HTTP/1.1" 200 -
127.0.0.1 - - [19/Sep/2018 09:15:32] "GET /favicon.ico HTTP/1.1" 200 -


### 10. Note
To automate code execution, convert to a py file and run through Windows Task Scheduler.