# Comparison Tool - MFD/Printer Data Among Toshiba Databases

## Overview

- This tool compares data exports from the following databases / files.

 - Install Base
 - PageSmart
 - Project Rollout Schedule(s)
 - FSM Billing File
 
- Functions:
 - Phase 1: [current] Compare databases 1-to-1 to identify missing devices.
 - Phase 2: Data accuracy 
 - Phase 3: Predictive corrections

## Identify Data Files and Global Variables 

In [40]:
import unicodecsv

#####################################
#               Formatting for comparison ease of read
#####################################
class color:
   PURPLE = '\033[95m'
   CYAN = '\033[96m'
   DARKCYAN = '\033[36m'
   BLUE = '\033[94m'
   GREEN = '\033[92m'
   YELLOW = '\033[93m'
   RED = '\033[91m'
   BOLD = '\033[1m'
   UNDERLINE = '\033[4m'
   END = '\033[0m'

#####################################
#               Convert Excel to csv (.xlsx - data must be on Sheet1)
#####################################
import xlrd
import csv

def csv_from_excel(file1, new_file_name):
    wb = xlrd.open_workbook(file1)
    sh = wb.sheet_by_name('Sheet1')
    your_csv_file = open(new_file_name, 'w')
    wr = csv.writer(your_csv_file, quoting=csv.QUOTE_ALL)

    for rownum in range(sh.nrows):
        wr.writerow(sh.row_values(rownum))

    your_csv_file.close()

# runs the csv_from_excel function:
#csv_from_excel('MGM IB.xlsx', "MGM IB.csv")

#####################################
#       WHICH FILES ARE WE LOOKING AT?
#####################################

#Typically Install Base or Oracle
src1 = 'MGM IB by 4 customer numbers 8.30.3019.csv'
#Typically PageSmart
#IMPORTANT: Ensure gspmps.com instead of https://tabs.toshibameters.com to ensure you’re looking at the most recent record when multiple FMAudit servers are installed for a single customer
src2 = 'MGM FMA 8.15.2019 Device Change Worksheet.csv'
#Typically Project ROS (if multiple ROS, then a summary of all of them)
#IMPORTANT: this is a single file for all ROS's 
src3 = 'MGM Project ROS Summary.csv'
#Typically full IB data set
src4 = 'Full IB 8.21.2019 - serials only.csv'
#Typically billing file
src5 = 'MGM Billing 8.27.2019.csv'
#Typically full IB data set - BRIEF VERSION
src6 = 'Full IB from 2016-2019 BRIEF.csv'
#misc file that has serials to compare
src7 = ''

#Customer Reference Number for Oracle Installed Base
cust_numbers = list()
cust_numbers = ("T0BS49A", "T0BUQW1", "T0BV7VF", "T0BVMSY")

print("I did the thing!")

I did the thing!


## Load Data from CSVs

In [41]:
#####################################
#       IMPORT SCRIPTS FOR EACH THAT EXPORTS SAMPLE OF SERIAL NUMBER
#####################################


def import_cvs_into_list(source_cvs):
    """
        Imports CSV into list
        Args:
            source_cvs
        Returns:
            data_list
    """
    #creates a list where each row is list entry. Each list entry is a collection of Dict's
    data_list = []
    with open(source_cvs, 'rb') as f:
        reader = unicodecsv.DictReader(f)
        for row in reader:
            data_list.append(row)
    #List value of row:
    print(color.UNDERLINE + "\nSample pulled from " + source_cvs + color.END + ", entry from 0th list entry: ")
    print(data_list[0])
    return data_list

ib_devices = list()
pagesmart_devices = list()
ros_devices = list()
ib_full_devices = list()
billing_devices = list()
ib_full_brief_devices = list()

if src1:
    ib_devices = import_cvs_into_list(src1)
if src2:
    pagesmart_devices = import_cvs_into_list(src2)
if src3:
    ros_devices = import_cvs_into_list(src3)
if src4:
    ib_full_devices_serials = import_cvs_into_list(src4)
if src5:
    billing_devices = import_cvs_into_list(src5)
if src6:
    ib_full_brief_devices = import_cvs_into_list(src6)



[4m
Sample pulled from MGM FMA 8.15.2019 Device Change Worksheet.csv[0m, entry from 0th list entry: 
OrderedDict([('\ufeffDevice ID', '1884272'), ('Device Name', 'HL-3170CDW series'), ('Model', 'Brother HL-3170CDW'), ('Serial Number', 'U63478D3J153685'), ('IP Address', '10.6.64.30'), ('Last Report Date ', '8/13/2019'), ('Managed', 'Unmanaged'), ('Ship To / Company Name', ''), ('Address1', ''), ('Address 2', ''), ('Attention', ''), ('City', ''), ('State', ''), ('Zip', ''), ('Location', ''), ('Asset Number', ''), ('Cost Center', ''), ('EUConfirmation Email', ''), ('Program Type', ''), ('Black Part', ''), ('Cyan Part', ''), ('Magenta Part', ''), ('Yellow Part', '')])
[4m
Sample pulled from MGM Project ROS Summary.csv[0m, entry from 0th list entry: 
OrderedDict([('\ufeffSheet Name', 'Borgata Implementation Rollout Schedule 2018'), ('Primary', 'MGM Borgata'), ('Location Name', ''), ('Street Address', ''), ('City', 'Atlantic City'), ('State', 'NJ'), ('Zip', '08401'), ('Floor / Reference 

## Harmonize data keys e.g. Serial Number columns

In [42]:
#####################################
#               Harmonize Keys      #
#####################################


serial_number_name_variants = ['SERIAL_NUMBER','Serial Number','Toshiba Serial Number','SerialNbr']

def harmonize_serial_number_key(list_a):
    """
    Since Serial Number is primary key, they key name sshould be the same among all tables
    ArgS:
        list_a
    Returns:
        none
    """
    for row in list_a:
        if ('serial_number') in row:
            continue
        for sn_variant in serial_number_name_variants:
            if sn_variant in row:
                row['serial_number'] = row[sn_variant]
                del row[sn_variant]
        if ('serial_number') not in row:
            print("Issue: Did not find serial number key.")

            
if src1:
    harmonize_serial_number_key(ib_devices)
    print(ib_devices[0]['serial_number'])
    
if src2:
    harmonize_serial_number_key(pagesmart_devices)
    print(pagesmart_devices[0]['serial_number'])

if src3:
    harmonize_serial_number_key(ros_devices)
    print(ros_devices[0]['serial_number'])

#NOTE: src4 is already a list of just serial numbers to reduce resource drain on computer
# if src4:
#     harmonize_serial_number_key(ib_full_devices_serials)
#     print(ib_full_devices_serials[0]['serial_number'])    
    
if src5:
    harmonize_serial_number_key(billing_devices)
    print(billing_devices[0]['serial_number'])

if src6:
    harmonize_serial_number_key(ib_full_brief_devices)
    print(ib_full_brief_devices[0]['serial_number'])

print("I did the thing!")

U63478D3J153685
SCFFH62112
JP8RD26625
SC7CE50600
I did the thing!


## Fixing Data Types - FUNCTION

In [43]:
#####################################
#     Add data types other than string, IB portion and functions
#####################################

from datetime import datetime as dt

# Takes a date as a string, and returns a Python datetime object. 
# If there is no date given, returns None
def parse_date(date):
    if date == '' or date == None:
        return None
    else:
        return dt.strptime(date, '%m/%d/%Y')
        #return dt.strptime(date, '%m/%d/%Y %H:%M:%S')
        
    
# Takes a string which is either an empty string or represents an integer,
# and returns an int or None.
def parse_maybe_int(i):
    if i == '':
        return None
    else:
        return int(i)
    
print("I did the thing!")

I did the thing!


## Fixing Data Types - INSTALL BASE

In [44]:
if src1:    
    # Clean up the data types an INSTALL BASE table
    for ib_device in ib_devices:
        ib_device['INSTALL_DATE'] = parse_date(ib_device['INSTALL_DATE'])
        #ib_device['days_to_cancel'] = parse_maybe_int(enrollment['days_to_cancel'])
        ib_device['GOLD_FLAG'] = ib_device['GOLD_FLAG'] == 'True'
    print ()

    print("I did the thing!")

## Fixing Data Types - PAGESMART

In [45]:
#####################################
#     Add data types other than string, PAGESMART portion
#####################################

if src2:
    for pagesmart_device in pagesmart_devices:
    #     pagesmart_device['lessons_completed'] = parse_maybe_int(pagesmart_device['MonoPages'])
    #     pagesmart_device['num_courses_visited'] = parse_maybe_int(pagesmart_device['ColorPages'])
    #     pagesmart_device['projects_completed'] = parse_maybe_int(pagesmart_device['BeginningPageCount'])
    #     pagesmart_device['total_minutes_visited'] = parse_maybe_int(pagesmart_device['EndingPageCount'])
    #     pagesmart_device['StartDateTime'] = parse_date(pagesmart_device['StartDateTime'])
        pagesmart_device['Last Report Date'] = parse_date(pagesmart_device['Last Report Date '])
    
    print("I did the thing!")



I did the thing!


## Fixing Data Types - Project ROS

In [46]:
#####################################
#     Add data types other than string, ROS portion
#####################################

if src3:
    print("I did the thing!")

I did the thing!


## Fixing Data Types - Billing

In [47]:
#####################################
#     Add data types other than string, BILLING portion
#####################################

if src5:
    print("I did the thing!")

I did the thing!


## Find/List Duplicate Serial Numbers

In [48]:
#####################################
#                 Find unique and non-blank, post duplicates
#####################################

## Find the total number of rows and the number of unique students (account keys)
## in each table.

def find_unique_and_none_blank(input_list):
    unique_values = set()
    duplicate_values = set()
    blank_counter = 0
    for row in input_list:
        if not row['serial_number']:
            blank_counter += 1
        elif row['serial_number'] not in unique_values:
            unique_values.add(row['serial_number'])
        elif row['serial_number'] is not None:
            duplicate_values.add(row['serial_number'])
            
    
    print('Total devices checked: ' +  str(len(input_list)))
    print('Total unique devices: ' + str(len(unique_values)))
    print('Total blanks: ' + str(blank_counter))
    
    if (len(input_list)!=len(unique_values)):
        print("Total duplicates devices: " + str(len(input_list)-len(unique_values)))
        if(len(duplicate_values) > 1 or duplicate_values):
            print(color.BOLD + "Duplicate records found. Please investigate then delete applicable record(s)." \
                  + color.END)
    return duplicate_values
    
#####################################
#               List orig vs duplicates count
#####################################

print (src1), print(find_unique_and_none_blank(ib_devices), "\n") if src1 else None
print (src2), print(find_unique_and_none_blank(pagesmart_devices), "\n") if src2 else None
print (src3), print(find_unique_and_none_blank(ros_devices), "\n") if src3 else None
print (src5), print(find_unique_and_none_blank(billing_devices), "\n") if src5 else None

    


MGM FMA 8.15.2019 Device Change Worksheet.csv
Total devices checked: 4059
Total unique devices: 3494
Total blanks: 565
Total duplicates devices: 565
set() 

MGM Project ROS Summary.csv
Total devices checked: 1744
Total unique devices: 1721
Total blanks: 0
Total duplicates devices: 23
[1mDuplicate records found. Please investigate then delete applicable record(s).[0m
{'CNIH43138', 'JPCCLDB098', 'JPCCLDB096', 'NOT BEING DELIVERED', 'CULH49851', 'JPCCLDW16W', 'DID NOT DEPLOY', 'CNBCM191Z2', 'JPCCLDB09B', 'JPCCLDB09D', 'CNBCL730RG', 'CNBCL730S0', '7018836002DXY', 'CNBCL730Q3'} 

MGM Billing 8.27.2019.csv
Total devices checked: 3384
Total unique devices: 2426
Total blanks: 24
Total duplicates devices: 958
[1mDuplicate records found. Please investigate then delete applicable record(s).[0m
{'SCZCJ48201', 'SCNAJ69794', 'SCHGG19612', 'SCHIH30162', 'SCHJG22296', 'SCNCJ39049', 'CNCCB9D0V2', 'SCNLH57875', 'SCNHH37911', 'JPDCGB205M', 'SCNCJ39057', 'SCNBJ33458', 'SCNBJ33539', 'SC1BJ13796', 'SCU

(None, None)

## Account for "Silly S"

In [49]:
#####################################
#    Compare serial numbers that may have "Silly S"
#####################################
#Some serials have the "Silly S" at the beginning.  Check values with added 'S' and removed first character to check against.
def check_serial_silly_s(input_serial, check_against_list):
    if (input_serial in check_against_list):
        return input_serial
    elif (("S"+input_serial) in check_against_list):
#         print('S added to beginning of Serial for ' + str(input_serial))
        return ("S"+input_serial)
    elif ((input_serial[1:]) in check_against_list):
#         print('First Character removed to beginning of Serial for ' + str(input_serial))
        return (input_serial[1:])
    else:
        return None
    
#test above function
if src1:
    print(check_serial_silly_s("S"+ib_devices[0]['serial_number'],"S" + ib_devices[0]['serial_number']))
    print(check_serial_silly_s("S"+ib_devices[0]['serial_number'],(ib_devices[0]['serial_number'])))
print("I did the thing!")

I did the thing!


## Create unique serials lists (removing duplicates)

In [50]:
#####################################
#     Create unique value list (remove duplicates)
#####################################

## Find any one student ib_devices where the student is missing from the daily engagement table.
## Output that enrollment.

def find_unique_and_return_list(input_list):
    unique_values = set()
    for row in input_list:
        if row['serial_number'] not in unique_values:
            unique_values.add(row['serial_number'])
    print('Total unique devices after check: ' + str(len(unique_values)))
    if (len(input_list)!=len(unique_values)):
        print("This function returned a list that removed the following number of duplicates: " \
              + str(len(input_list)-len(unique_values)) + '\n')
    return unique_values

if src1: 
    print(color.UNDERLINE +'ib_devices total rows: ' + str(len(ib_devices)) + color.END)
    ib_devices_unique_device_serials = find_unique_and_return_list(ib_devices)

if src2:
    print(color.UNDERLINE +'pagesmart_devices total rows: ' + str(len(pagesmart_devices)) + color.END)
    pagesmart_devices_unique_device_serials = find_unique_and_return_list(pagesmart_devices)

if src3:
    print(color.UNDERLINE +'ros_devices total rows: ' + str(len(ros_devices)) + color.END)
    ros_devices_unique_device_serials = find_unique_and_return_list(ros_devices)

if src5:
    print(color.UNDERLINE +'billing_devices total rows: ' + str(len(billing_devices)) + color.END)
    billing_devices_unique_device_serials = find_unique_and_return_list(billing_devices)


[4mpagesmart_devices total rows: 4059[0m
Total unique devices after check: 3495
This function returned a list that removed the following number of duplicates: 564

[4mros_devices total rows: 1744[0m
Total unique devices after check: 1721
This function returned a list that removed the following number of duplicates: 23

[4mbilling_devices total rows: 3384[0m
Total unique devices after check: 2427
This function returned a list that removed the following number of duplicates: 957



## Remove Duplicates and account for "Silly S"

In [51]:
#####################################
#     Create unique value list (remove duplicates) AND ACCOUNT FOR SILLY S
#####################################

## Find any one student ib_devices where the student is missing from the daily engagement table.
## Output that enrollment.

# def find_unique_and_return_list_silly_s_accounted_for(input_list):
#     unique_values = set()
#     for row in input_list:
# #         if row['serial_number'] not in unique_values:
# #             unique_values.add(row['serial_number'])
#         if not check_serial_silly_s(row['serial_number'],unique_values):
#             unique_values.add(row['serial_number'])
#     print('Total unique devices after check: ' + str(len(unique_values)))
#     if (len(input_list)!=len(unique_values)):
#         print("This function returned a list that removed the following number of duplicates: " \
#               + str(len(input_list)-len(unique_values)) + '\n')
#     return unique_values

# if ib_devices:
#     print(color.UNDERLINE +'ib_devices total rows: ' + str(len(ib_devices)) + color.END)
#     ib_devices_unique_device_serials_silly_s = find_unique_and_return_list_silly_s_accounted_for(ib_devices)

# if pagesmart_devices:
#     print(color.UNDERLINE +'pagesmart_devices total rows: ' + str(len(pagesmart_devices)) + color.END)
#     pagesmart_devices_unique_device_serials_silly_s  = find_unique_and_return_list_silly_s_accounted_for(pagesmart_devices)
    
# if ros_devices:
#     print(color.UNDERLINE +'ros_devices total rows: ' + str(len(ros_devices)) + color.END)
#     ros_devices_unique_device_serials_silly_s  = find_unique_and_return_list_silly_s_accounted_for(ros_devices)
    
# if billing_devices:
#     print(color.UNDERLINE +'billing_devices total rows: ' + str(len(billing_devices)) + color.END)
#     billing_devices_unique_device_serials_silly_s  = find_unique_and_return_list_silly_s_accounted_for(billing_devices)
    

## Missing Records - COUNT (only) Missing Serials From Each DB

In [52]:

#####################################
#     Check for missing SN's from each DB WITHOUT MISSING SERIAL LIST
#####################################
#comparison function between two lists 
# def count_found_serials_in_lists(list_a, list_b):
#     found_matches = set()
#     not_found = set()
#     for device in list_a:
#         if check_serial_silly_s(device, list_b):
#             found_matches.add(str(device))
#             continue
#         else:
#             not_found.add(str(device))
#     print("Found total: " + str(len(found_matches))) 

# if ib_devices and pagesmart_devices_unique_device_serials:
#     print(color.UNDERLINE +'\nComparing (counting) the following lists for missing serial numbers: \n' + color.END + " " + src1 + ' \n ' + src2 )
#     count_found_serials_in_lists(ib_devices_unique_device_serials, pagesmart_devices_unique_device_serials)

# if pagesmart_devices and ros_devices_unique_device_serials:
#     print(color.UNDERLINE +'\nComparing (counting)the following lists for missing serial numbers: \n' + color.END + " " + src1 + ' \n ' + src3 )
#     count_found_serials_in_lists(ib_devices_unique_device_serials, ros_devices_unique_device_serials)
    
# if ros_devices and pagesmart_devices_unique_device_serials:
#     print(color.UNDERLINE +'\nComparing (counting)the following lists for missing serial numbers: \n' + color.END + " " + src3 + ' \n ' + src2 )
#     count_found_serials_in_lists(ros_devices_unique_device_serials, pagesmart_devices_unique_device_serials)
    
# if billing_devices and ros_devices_unique_device_serials:
#     print(color.UNDERLINE +'\nComparing (counting)the following lists for missing serial numbers: \n' + color.END + " " + src2 + ' \n ' + src3 )
#     count_found_serials_in_lists(pagesmart_devices_unique_device_serials, ros_devices_unique_device_serials)







## Missing Records - Filter for N/A for Search Entires FUNCTION

In [53]:
def filter_out(filter_keys_values_list, unique_serials_list, orig_data_set):
    """
    #####################################
        Removes entries based on filter
        Args: 
            list_a 
            key
            key 2 (if applicable)
        Returns:
            filtered_list
    #####################################
    """
    filtered_list = list()
    removed_list = list()
    print('\nBefore: quantity of lists\' entries: ' + str(len(unique_serials_list)))

    # create data list using unique serial numbers:
    full_data_unique = list()
    for unique_entry in unique_serials_list:
        # find orig data entry
        for orig_entry in orig_data_set:
            if (unique_entry == orig_entry['serial_number']):
                full_data_unique.append(orig_entry)
                break
    
    filtered_list =  full_data_unique
    
    for filter_item in filter_keys_values_list:
        key = filter_item[0]
        value = filter_item[1]
        print("Removing key/value: ", key, value)
        for entry in full_data_unique:
            if entry[key] == value:
                #print("Removing: ", entry['serial_number'])
                removed_list.append(entry)
                filtered_list.remove(entry)
    if (removed_list):
        print("Number of entries removed: " + color.RED + str(len(removed_list)) + color.END + " out of a total " + str(len(unique_serials_list)))
        
    print('After: quantity of lists\' entries: ' + str(len(full_data_unique)))
    return filtered_list
print("I did the thing!")

I did the thing!


## Missing Records - Filter for N/A for Search Entires CRITERIA

In [54]:
#What's in IB that should not be in PS/ROS?
ib_data_to_filter = list()
ib_data_to_filter.append(('CUSTOMER_LOCATION_STATE','MD'))
ib_data_to_filter.append(('CUSTOMER_NAME', 'MGM STUDIOS'))
ib_data_to_filter.append(('CUSTOMER_NAME', 'MGM GALLERIES LLC'))
ib_data_to_filter.append(('CUSTOMER_NAME', 'MGM LIQUOR WAREHOUSE'))
ib_data_to_filter.append(('CUSTOMER_NAME', 'MGM MORTGAGE'))

print("Length of ib_data_filterd: ", len(ib_data_to_filter))

if src1 and src3: 
    #What's in PS that would not be in IB?
    ps_data_to_filter_from_ib = list()
    print("Length of ps_data_filterd_from_ib: ", len(ps_data_to_filter_from_ib))

if src2 and src3: 
    #What's in PS that won't be in ROS?
    ps_data_to_filter_from_ros = list()
    print("Length of ps_data_filterd_from_ros: ", len(ps_data_to_filter_from_ros))

if src3 and src2: 
    #Note: All ROS (installed) should be in PS - No filters suggested
    ros_data_to_filter_from_ps = list()
    print("Length of ros_data_to_filter_from_ps: ", len(ros_data_to_filter_from_ps))

if src1 and src3: 
    #Note: All ROS (installed) should be in IB - No filters suggested
    ros_data_to_filter_from_ib = list()
    print("Length of ros_data_to_filter_from_ib: ", len(ros_data_to_filter_from_ib))

if src5: 
    #Note: All BILLING (installed) should be in IB - No filters suggested
    billing_data_to_filter = list()
    print("Length of ros_data_to_filter_from_ib: ", len(billing_data_to_filter))

print("I did the thing!")

Length of ib_data_filterd:  5
Length of ps_data_filterd_from_ros:  0
Length of ros_data_to_filter_from_ps:  0
Length of ros_data_to_filter_from_ib:  0
I did the thing!


## Missing Records - Filter for N/A for Search Entires PERFORM

In [55]:
#ros_found_in_ib = check_and_list_missing_serials_in_lists(ros_devices_unique_device_serials, ib_devices_unique_device_serials)


#INSTALL BASE FILTERS
if src1:
    ib_data_filtered = filter_out(ib_data_to_filter, ib_devices_unique_device_serials, ib_devices)

if src2 and src1:
    ps_data_filtered_for_ib = filter_out(ps_data_to_filter_from_ib, pagesmart_devices_unique_device_serials, pagesmart_devices)
if src2 and src3:
    ps_data_filtered_for_ros = filter_out(ps_data_to_filter_from_ros, pagesmart_devices_unique_device_serials, pagesmart_devices)
    
if src3 and src2: 
    ros_data_filtered_for_ps = filter_out(ros_data_to_filter_from_ps, ros_devices_unique_device_serials, ros_devices)
if src3 and src1:
    ros_data_filtered_for_ib = filter_out(ros_data_to_filter_from_ib, ros_devices_unique_device_serials, ros_devices)
    
if src5 and src1: 
    billing_data_filtered_for_ib = filter_out(billing_data_to_filter, billing_devices_unique_device_serials, billing_devices)



Before: quantity of lists' entries: 3495
After: quantity of lists' entries: 3495

Before: quantity of lists' entries: 1721
After: quantity of lists' entries: 1721


## Missing Records - Find/List Missing Serials From Each DB FUNCTION

In [56]:
from tabulate import tabulate
from prettytable import PrettyTable
from datetime import datetime

#comparison function between two lists 
def check_and_list_missing_serials_in_lists(list_a, list_b):
    """
    #####################################
        Takes in two lists (each list is a list of dicts for a devices e.g. serial_number key to asdf1234 value)
            and compares them to each other finding matches and "not_found" based on serial_number.
        Args: 
            list_a - entries to be check, return values are based on these entries
            list_b - entires to check against
        Returns:
            found_matches - a list of entries from list_a that were found in list_b based on serial_number
            not_found - a list of entries from list_a that were NOT found in list_b based on serial_number
    #####################################
    """
    
    #Print output 
    print(color.UNDERLINE +'\nComparison stats:\n' + color.END + \
     " • " + 'Quantity of primary lists\' entries: ' + str(len(list_a)))

    found_matches = list()
    not_found = list()
    for device in list_a:
        device_serial = device['serial_number'] 
        if check_serial_silly_s(device_serial, list_b):
            found_matches.append(device)
            continue
        else:
            not_found.append(device)
            
    #Print output         
    print(" • " + 'Matches found: ' + str(len(found_matches)))

    if (not_found):
        output_sample_records(not_found)

    return found_matches, not_found

print("I did the thing!")


I did the thing!


## Missing Records - Find/List Missing Serials From Each OUTPUT FUNCTION

In [57]:
def output_sample_records(list_a):
    rows = list()
    #Print output         
    print (" • " + 'Total Missing: ' + str(len(list_a)) + " \n" \
    + color.BOLD + "Please investigate then add applicable record(s). Some Key fields: \n" + color.END)

    #Check if IB headers exist and then output
    if 'CUSTOMER_LOCATION_ADDRESS_1' in list_a[0].keys():
        headers = ('serial_number','INSTALL_DATE','CUSTOMER_NAME','CUSTOMER_LOCATION_ADDRESS_1')
        for entry in list_a:
            rows.append([entry['serial_number'], entry['INSTALL_DATE'],
                         entry['CUSTOMER_NAME'], entry['CUSTOMER_LOCATION_ADDRESS_1']])

    #Check if PS headers exist and then output
    elif 'Ship To / Company Name' in list_a[0].keys():
        headers = ('serial_number', 'Last Report Date' ,'Ship To / Company Name','Address1', 'Model')
        for entry in list_a:       
            rows.append([entry['serial_number'], entry['Last Report Date'],
                         entry['Ship To / Company Name'], entry['Model']])

    #Check if ROS headers exist and then output
    elif 'ACTION' in list_a[0].keys():
        headers = ('serial_number', 'Primary', 
                   'ACTION', 'STATUS for Delivery / Installation' )
        for entry in list_a:      
            rows.append([entry['serial_number'], entry['Primary'], 
                        entry['ACTION'], 
                         entry['STATUS for Delivery / Installation']])
    #else (no header matches)
    else:
        headers = 'serial_number'
        for entry in list_a:
            rows.append([entry['serial_number']])

    print(tabulate(rows, headers))
    
print("I did the thing!")

I did the thing!


## Missing Records - Find/List Missing Serials PART 1

In [58]:
#####################################
#                 LIST MISSING SERIAL NUMBERS
#####################################
if src3 and src1:
    print(color.UNDERLINE +'\nList comparison:' + color.END + ' If any devices are listed below, they are in... \n'  + \
         " • " + "" + color.GREEN + src3 + color.END + " but are... \n" + \
         " • " +  color.BOLD + color.RED + "Missing from " + src1 + color.END)
    ros_found_in_ib = check_and_list_missing_serials_in_lists(ros_data_filtered_for_ib, ib_devices_unique_device_serials)

## Missing Records - Find/List Missing Serials PART 2

In [59]:

if src3 and src2:
    print(color.UNDERLINE +'\nList comparison:' + color.END + ' If any devices are listed below, they are in... \n'  + \
         " • " + "" + color.GREEN + src3 + color.END + " but are... \n" + \
         " • " +  color.BOLD + color.RED + "Missing from " + src2 + color.END)
    ros_found_in_ps = check_and_list_missing_serials_in_lists(ros_data_filtered_for_ps, pagesmart_devices_unique_device_serials)


[4m
List comparison:[0m If any devices are listed below, they are in... 
 • [92mMGM Project ROS Summary.csv[0m but are... 
 • [1m[91mMissing from MGM FMA 8.15.2019 Device Change Worksheet.csv[0m
[4m
Comparison stats:
[0m • Quantity of primary lists' entries: 1721
 • Matches found: 1421
 • Total Missing: 300 
[1mPlease investigate then add applicable record(s). Some Key fields: 
[0m
serial_number                      Primary                          ACTION                    STATUS for Delivery / Installation
---------------------------------  -------------------------------  ------------------------  ------------------------------------
JPCCM3M1HG                         Bellagio                         REMOVE/REPLACE            COMPLETE
CUKH48939                          MGM Grand                        REMOVE/REPLACE            COMPLETE
SCUEG31042                         MZ                               Remove/Replace            COMPLETE
CNBCL6T0FF                         

## Missing Records - Find/List Missing Serials PART 3

In [60]:

if src1 and src3:
    print(color.UNDERLINE +'\nList comparison:' + color.END + ' If any devices are listed below, they are in... \n'  + \
         " • " + "" + color.GREEN + src1 + color.END + " but are... \n" + \
         " • " +  color.BOLD + color.RED + "Missing from " + src3 + color.END)

    ib_found_in_ps = check_and_list_missing_serials_in_lists(ib_data_filtered, ros_devices_unique_device_serials)


## Missing Records - Find/List Missing Serials PART 4

In [61]:
if src3 and src5:
    print(color.UNDERLINE +'\nList comparison:' + color.END + ' If any devices are listed below, they are in... \n'  + \
         " • " + "" + color.GREEN + src3 + color.END + " but are... \n" + \
         " • " +  color.BOLD + color.RED + "Missing from " + src5 + color.END)

    ros_found_in_billing = check_and_list_missing_serials_in_lists(ros_data_filtered_for_ps, billing_devices_unique_device_serials)


[4m
List comparison:[0m If any devices are listed below, they are in... 
 • [92mMGM Project ROS Summary.csv[0m but are... 
 • [1m[91mMissing from MGM Billing 8.27.2019.csv[0m
[4m
Comparison stats:
[0m • Quantity of primary lists' entries: 1721
 • Matches found: 1697
 • Total Missing: 24 
[1mPlease investigate then add applicable record(s). Some Key fields: 
[0m
serial_number                      Primary        ACTION          STATUS for Delivery / Installation
---------------------------------  -------------  --------------  ------------------------------------
CUDJ53207                          Bellagio       REMOVE/REPLACE  COMPLETE
CULH49853                          MGM Park       ADD             COMPLETE
SCGGG21734                         1MI1           Remove/Replace  COMPLETE
NOT BEING DELIVERED                1BOT           Remove/Replace  COMPLETE
CUDJ53216                          Bellagio       REMOVE/REPLACE  COMPLETE
CUDJ53213                          Bellagio   

## Deeper dive into IB FUNCTION

In [62]:
#comparison function between two lists 
def found_and_missing_devices(list_a, list_b):
    """
    #####################################
        Takes in two lists (each list is a list of dicts for a devices e.g. serial_number key to asdf1234 value)
            and compares them to each other finding matches and "not_found" based on serial_number.
        Args: 
            list_a - entries to be check, return values are based on these entries
            list_b - entires to check against
        Returns:
            found_matches - a list of entries from list_a that were found in list_b based on serial_number
            not_found - a list of entries from list_a that were NOT found in list_b based on serial_number
    #####################################
    """
    #Print output 
    print(color.UNDERLINE +'\nComparison stats:\n' + color.END + \
     " • " + 'Quantity of primary lists\' entries: ' + str(len(list_a)))

    found_matches = list()
    not_found = list()
    for device in list_a:
        device_serial = device['serial_number'] 
        if check_serial_silly_s(device_serial, list_b):
            found_matches.append()
            continue
        else:
            not_found.append(device)
            
    #Print output         
    print(" • " + 'Matches found: ' + str(len(found_matches)))

    if (not_found):
        output_sample_records(not_found)

    return found_matches, not_found

print("I did the thing!")

I did the thing!


## IB DEEPER DIVE - Import full list of IB serials

In [63]:
## Deeper dive with full IB Serial list using NumPy
import pandas as pd
import numpy as np
if src4 and src1: 
    full_ib_serials = np.array(pd.read_csv(src4))
    print (full_ib_serials.dtype)
    print(full_ib_serials[0])


## IB DEEPER DIVE - Output list of serials found in project but not found in IB Customer nor IB Full.  TOTALLY MISSING FROM IB.

In [64]:
if src4 and src1: 
    print("There were the following number NOT found in IB: ", len(ros_found_in_ib[1]))
    devices_missing_vs_found_in_full_ib = check_and_list_missing_serials_in_lists(ros_found_in_ib[1],full_ib_serials)

## IB DEEPER DIVE - Record accuracy issue - Found in full IB data but NOT customer-number IB

In [65]:
print("Please investigate then correct issues for these record(s). Some Key fields: \n")

# variables: devices_missing_vs_found_in_full_ib [0=found in previous comparison, 1=not found]
# We want to list the IB details about the ones found. Total list found in devices_missing_vs_found_in_full_ib[0]
if src4 and src1: 
    if (devices_missing_vs_found_in_full_ib[0]):
        output_sample_records(devices_missing_vs_found_in_full_ib[0])



Please investigate then correct issues for these record(s). Some Key fields: 



## IB DEEPER DIVE - Import full IB BRIEF list to get more IB information (resource heavy operation)

In [66]:
## Deeper dive with full IB Serial list using NumPy
if src6 and src1:
    full_ib_brief_serials = np.array(pd.read_csv(src6))
    print (full_ib_brief_serials.dtype)
    print(full_ib_brief_serials[0])
    print ('I did the thing!)')

## IB DEEPER DIVE: Output IB data found in project but not customer-IB. i.e. Same as above but with data from IB instead of project. (resource heavy operation)

In [67]:
print("Please investigate then correct issues for these record(s). Some Key fields: \n")
if src6 and src1: 
    if (devices_missing_vs_found_in_full_ib[0]):
        rows = list()
        #Print output         
        print (" • " + 'Total Found with issue: ' + str(len(devices_missing_vs_found_in_full_ib[0])) + " \n" \
        + color.BOLD + "Please investigate then correct applicable record(s). Some Key fields: \n" + color.END)
        print("These may not have known customer nubmers: T0BS49A, T0BUQW1, T0BV7VF, T0BVMSY")
        print("Ken's Observation on customer numbers: 506502, 507534, 640983")
    
    #Check if IB headers exist and then output
    headers = ('serial_number','INSTALL_DATE','CUSTOMER_REFERENCE','CUSTOMER_NAME', "CUSTOMER_LOCATION_ADDRESS_1")
    for entry in devices_missing_vs_found_in_full_ib[0]:
        for ib_entry in full_ib_brief_serials:
            if entry['serial_number'] == ib_entry[5]:
                rows.append([ib_entry[5], ib_entry[0], ib_entry[1],
                             ib_entry[2], ib_entry[3]])
                break

    print(tabulate(rows, headers))

Please investigate then correct issues for these record(s). Some Key fields: 



## Summary of Devices Found vs. Not-Found between Various Databases

In [69]:
print("Project Devices Installed: " + str(len(ros_devices_unique_device_serials)))

print("\n • " + "Project Devices found in the Billing file: " + str(len(ros_found_in_billing[0])) + \
"\n • " + "And those not found: " +  str(len(ros_found_in_billing[1]))) if src5 and src3 else None

print( "\n • " + "Project Devices found in PageSmart: " + str(len(ros_found_in_ps[0])) + \
"\n • " + "And those not found: " +  str(len(ros_found_in_ps[1])) ) if src2 and src3 else None


print( "\n • " + "Project Devices found in IB (Oracle Installed Base): " + str(len(ros_found_in_ib[0])) + \
"\n • " + "And those not found: " +  str(len(ros_found_in_ib[1])) ) if src1 and src3 else None

print("\n   • " + "Of which were found in IB with data issue: " + str(len(devices_missing_vs_found_in_full_ib[0])) + \
"\n   • " + "And those not found at all: " +  str(len(devices_missing_vs_found_in_full_ib[1])) ) if src4 and src1 else None


print("\nInstalled Base (Oracle) Devices pulled using customer numbers: ", cust_numbers )

print("Installed Base (Oracle) Devices: " + str(len(ib_devices_unique_device_serials)) + \
    "\n • " + "IB Devices found in PageSmart: " + str(len(ib_found_in_ps[0])) + \
    "\n • " + "And those not found: " +  str(len(ib_found_in_ps[1])) ) if src1 and src2 else None


Project Devices Installed: 1721

 • Project Devices found in the Billing file: 1697
 • And those not found: 24

 • Project Devices found in PageSmart: 1421
 • And those not found: 300

Installed Base (Oracle) Devices pulled using customer numbers:  ('T0BS49A', 'T0BUQW1', 'T0BV7VF', 'T0BVMSY')


## Export Spreadsheet

In [None]:
# with open("exported_spreadsheet.csv", 'w', newline='') as myfile:
#      wr = csv.writer(myfile, quoting=csv.QUOTE_ALL)
#      wr.writerow(ros_found_in_billing[1])
    
# np.savetxt("file_name.csv", ros_found_in_billing[1], delimiter=",", fmt='%s', header=header)

# print(ros_found_in_billing[1][0].keys())

# toCSV = [ros_found_in_billing[1]]
# keys = ros_found_in_billing[1][0].keys()
# with open('people.csv', 'wb') as output_file:
#     dict_writer = newline.DictWriter(output_file, keys)
#     dict_writer.writeheader()
#     dict_writer.writerows(toCSV)

# import pandas
# dataframe = pandas.read_csv("exported_spreadsheet.csv")
# list_of_dictionaries = dataframe.to_dict(ros_found_in_billing[1])
# dataframe.to_csv("exported_spreadsheet.csv")

# with open ('list.csv', 'w') as f:
#     for dict in ros_found_in_billing[1]:
#         for key, value in dict.items():
#             text = key+','+value+'\n'
#             f.writelines(text)

# keys = [i for s in [d.keys() for d in ros_found_in_billing[1]] for i in s]

# with open('test.csv', 'a') as output_file:
#     dict_writer = csv.DictWriter(output_file, restval="-", fieldnames=keys, delimiter='@')
#     dict_writer.writeheader()
#     dict_writer.writerows(ros_found_in_billing[1])

# keys = [i for s in [d.keys() for d in ros_found_in_billing[1]] for i in s]
# f = open("sample.csv", "w")
# writer = csv.DictWriter(
#     f, fieldnames=keys)
# writer.writeheader()
# writer.writerows(ros_found_in_billing[1])
# f.close()

## next topic

## next topic

## next topic

## next topic

## next topic

## next topic

## next topic