# Asset Tracker Tool - Tracking serial numbers among multiple data sets

## Overview

- This tool compares data exports in CVS formate. Examples of datasets that can be compared include:

 - Installed Base
 - PageSmart (FMAudit data)
 - Project Rollout Schedule(s)
 - Monthly Billing File
 
- Functions:
 - Phase 1: [current] Compare databases 1-to-1 to identify missing devices.
 - Phase 2: Data accuracy 
 - Phase 3: Predictive corrections
 
- TO DO:
 - filter out dynamic, add filter in
 - convert to cvs
 - account for removal actions in filter out

## Identify Data Files and Global Variables 

In [90]:
import unicodecsv

#####################################
#               Formatting for comparison ease of read
#####################################
class color:
   PURPLE = '\033[95m'
   CYAN = '\033[96m'
   DARKCYAN = '\033[36m'
   BLUE = '\033[94m'
   GREEN = '\033[92m'
   YELLOW = '\033[93m'
   RED = '\033[91m'
   BOLD = '\033[1m'
   UNDERLINE = '\033[4m'
   END = '\033[0m'

#####################################
#       WHICH FILES ARE WE LOOKING AT?
#####################################
'''
    src files are standard exports, typically <1mb
    src_large_n files are typically larger than a 1mb, up to 15mb
'''
source_file_names = [
    'MGM IB by 4 customer numbers 8.30.3019.csv',
    'MGM FMA 8.15.2019 Device Change Worksheet.csv',
    'MGM Project ROS Summary.csv',
    'EINVOICE-T0BUBPS-30-SEP-19.csv'
    ]
source_file_names_large = [
    'TABS_DW_RPT_INSTALL_BASE_ALL_NEW_VW_20190924 BRIEF.csv',
    ]

print("Number of entries in source_file_names: ", len(source_file_names))
print("Number of entries in source_file_names_large: ", len(source_file_names_large))


#####################################
#       MISC information
#####################################
#Customer Reference Number for Oracle Installed Base
cust_numbers = list()
cust_numbers = ("T0BS49A", "T0BUQW1", "T0BV7VF", "T0BVMSY")
print("Number of entries in cust_numbers: ", len(cust_numbers))

serial_number_name_variants = list()
serial_number_name_variants = ['SERIAL_NUMBER','Serial Number','Toshiba Serial Number','SerialNbr', '\ufeffSERIAL_NUMBER']
print("Number of entries in serial_number_name_variants: ", len(serial_number_name_variants))

column_data_types_dates = list()
column_data_types_dates = ['INSTALL_DATE','Last Report Date']
print("Number of entries in column_data_types_dates: ", len(column_data_types_dates))

potential_leading_chars = list()
potential_leading_chars = ['S', 's']
print("Number of entries in potential_leading_chars: ", len(potential_leading_chars))

non_issue_duplicate_column = list()
non_issue_duplicate_column = ['Counter Name']
print("Number of entries in non_issue_duplicate_column: ", len(non_issue_duplicate_column))

key_and_value_to_filter_out = list()
key_and_value_to_filter_out.append(('CUSTOMER_LOCATION_STATE','MD'))
key_and_value_to_filter_out.append(('CUSTOMER_NAME', 'MGM STUDIOS'))
key_and_value_to_filter_out.append(('CUSTOMER_NAME', 'MGM GALLERIES LLC'))
key_and_value_to_filter_out.append(('CUSTOMER_NAME', 'MGM LIQUOR WAREHOUSE'))
key_and_value_to_filter_out.append(('CUSTOMER_NAME', 'MGM MORTGAGE'))
key_and_value_to_filter_out.append(('ACTION', 'REMOVE'))

print("Number of entries in key_and_value_to_filter_out: ", len(key_and_value_to_filter_out))

#What are some headings you'd like to see if we find missing assets?
potential_data_headers_keys = list()
potential_data_headers_keys = ['serial_number', \
                               'ACTION', 'Completed Install Date', 'Last Report Date' , \
                               'Primary', 'STATUS for Delivery / Installation', \
                               'Location Name', 'Completed Install Date' \
                               'Ship To / Company Name','Address1', 'Model', \
                               'Ship-To Name', 'Ship To Address1', 'Install Date', 'Status'\
                               'INSTALL_DATE','CUSTOMER_NAME','CUSTOMER_LOCATION_ADDRESS_1']     
print("Number of entries in potential_data_headers_keys: ", len(potential_data_headers_keys))

print("I did the thing!")

Number of entries in source_file_names:  4
Number of entries in source_file_names_large:  1
Number of entries in cust_numbers:  4
Number of entries in serial_number_name_variants:  5
Number of entries in column_data_types_dates:  2
Number of entries in potential_leading_chars:  2
Number of entries in non_issue_duplicate_column:  1
Number of entries in key_and_value_to_filter_out:  6
Number of entries in potential_data_headers_keys:  16
I did the thing!


## Load Data from CSVs

In [None]:
#####################################
#       IMPORT SCRIPTS FOR EACH THAT EXPORTS SAMPLE OF SERIAL NUMBER
#####################################

def import_cvs_into_list(source_cvs):
    """
        Imports CSV into list
        Args:
            source_cvs file
        Returns:
            data in a list variable
    """
    #creates a list where each row is list entry. Each list entry is a collection of Dict's
    data_list = []
    with open(source_cvs, 'rb') as f:
        reader = unicodecsv.DictReader(f)
        for row in reader:
            data_list.append(row)
    #List value of row:
    print(color.UNDERLINE + "Sample pulled from " + source_cvs + color.END + ", entry from 0th list entry: ")
    print(data_list[0])
    return data_list

#####################################
#       IMPORT SCRIPTS FOR EACH THAT EXPORTS SAMPLE OF SERIAL NUMBER [VERION 2.0]
#####################################
source_file_lists=list()
source_file_lists_large=list()

for num, file_name in enumerate(source_file_names, start=1):
    print("\nImporting {}: {} as new list: data_set_{}".format(num, file_name, num))
    locals()["data_set_" + str(num)] = import_cvs_into_list(file_name)
    source_file_lists.append(locals()["data_set_" + str(num)])

for num, file_name in enumerate(source_file_names_large, start=1):
    print("\nImporting: {} as new list: data_set_large_{}".format(file_name, num))
    locals()["data_set_large_" + str(num)] = import_cvs_into_list(file_name)
    source_file_lists_large.append(locals()["data_set_large_" + str(num)])

print("I did the thing!")


Importing 1: MGM IB by 4 customer numbers 8.30.3019.csv as new list: data_set_1
[4mSample pulled from MGM IB by 4 customer numbers 8.30.3019.csv[0m, entry from 0th list entry: 
OrderedDict([('\ufeffPRODUCT_NUMBER', '63451262'), ('INSTALL_DATE', '4/1/2019'), ('GOLD_FLAG', 'Y'), ('CUSTOMER_REFERENCE', 'T0BS49A'), ('CUSTOMER_NAME', 'MGM RESORTS INTERNATIONAL'), ('SRC_PARTY_SITE_NUMBER', '2601839'), ('CUSTOMER_LOCATION_ADDRESS_1', '71 E HARMON AVE'), ('CUSTOMER_LOCATION_ADDRESS_2', ''), ('CUSTOMER_LOCATION_CITY', 'LAS VEGAS'), ('CUSTOMER_LOCATION_STATE', 'NV'), ('CUSTOMER_LOCATION_ZIP', '89109-4539'), ('SRV_DEALER_REFERENCE', '00740600'), ('SRV_DEALER_NAME', 'TOSHIBA BUSINESS SOLUTIONS AZ-CO'), ('SRV_DEALER_LOCATION_ADDRESS_1', 'C/O MGM RESORTS'), ('SRV_DEALER_LOCATION_ADDRESS_2', '5014 BOND STREET'), ('SRV_DEALER_LOCATION_CITY', 'LAS VEGAS'), ('SRV_DEALER_LOCATION_STATE', 'NV'), ('SRV_DEALER_LOCATION_ZIP', '89118-1575'), ('ORIG_DEALER_REFERENCE', '00743600'), ('ORIG_DEALER_NAME', 'TABS

## Harmonize data keys e.g. Serial Number columns

In [None]:
#####################################
#               Harmonize Keys      #
#####################################

def harmonize_serial_number_key(list_a):
    """
    Since Serial Number is primary key, they key name sshould be the same among all tables
    Args:
        list_a
    Returns:
        none
    """
    for row in list_a:
        if ('serial_number') in row:
            continue
        for sn_variant in serial_number_name_variants:
            if sn_variant in row:
                row['serial_number'] = row[sn_variant]
                del row[sn_variant]
        if ('serial_number') not in row:
            print("Issue: Did not find serial number key.")

for data_set in source_file_lists:
    harmonize_serial_number_key(data_set)
    print(data_set[0]['serial_number'])
    
for data_set in source_file_lists_large:
    harmonize_serial_number_key(data_set)
    print(data_set[0]['serial_number'])

print("I did the thing!")

## Fixing Data Types

In [None]:
# #####################################
# #     Add data types other than string, IB portion and functions
# #####################################

# from datetime import datetime as dt

# def validate_date(date):
#     try:
#         dt.strptime(date, '%m/%d/%Y')
#         #return dt.strptime(date, '%m/%d/%Y %H:%M:%S')
#     except ValueError:
#         raise ValueError("Incorrect data format, should be %m/%d/%Y")
        
# # Takes a date as a string, and returns a Python datetime object. 
# # If there is no date given, returns None
# def parse_date(date):
#     if date == '' or date == None:
#         return None
#     validate_date(date)
#     dt.strptime(date, '%m/%d/%Y')
    
# # Takes a string which is either an empty string or represents an integer,
# # and returns an int or None.
# def parse_maybe_int(i):
#     if i == '':
#         return None
#     else:
#         return int(i)

# print("I did the thing!")
    
# print("----------------------------------------------------------")
# for data_set in source_file_lists:
#     for date_type_column in column_data_types_dates:
#         if date_type_column in data_set[0].keys():
#             data_entry[date_type_column] = parse_date(str(data_entry[date_type_column]))

# #     if 'INSTALL_DATE' in data_set[0].keys():
# #         for data_entry in data_set:
# #             data_entry['INSTALL_DATE'] = parse_date(data_entry['INSTALL_DATE'])
# #     if 'GOLD_FLAG' in data_set[0].keys():
# #         for data_entry in data_set:
# #             data_entry['GOLD_FLAG'] = data_entry['GOLD_FLAG'] == 'True'
# #     if 'Last Report Date' in data_set[0].keys():
# #         for data_entry in data_set:
# #             data_entry['Last Report Date'] = parse_date(data_entry['Last Report Date'])

# print("I did the thing!")    


## Ouput Duplicate Serial Numbers

In [None]:
#####################################
#                 Find unique and non-blank, post duplicates
#####################################

## Find the total number of rows and the number of unique students (account keys)
## in each table.

def find_unique_and_none_blank(input_list):
    unique_values = set()
    duplicate_values = set()
    non_issue_duplicate_values = set()
    blank_counter = 0
    for row in input_list:
        if not row['serial_number']:
            blank_counter += 1
        elif row['serial_number'] not in unique_values:
            unique_values.add(row['serial_number'])
        elif row['serial_number'] is not None:
            #let's check if the duplicate is a non-issue
            for non_issue_column in non_issue_duplicate_column:
                if non_issue_column in row:
                    #find the previously found record...
                    for new_row in input_list: 
                        if new_row['serial_number'] == row['serial_number']: 
                            #if the values for non_issue_column in the two duplicated values do match...
                            if new_row[non_issue_column] == row[non_issue_column]:
                                duplicate_values.add(row['serial_number'])
                            else:
                                non_issue_duplicate_values.add(row['serial_number'])
                            break
                break
    
    print('Total devices checked: ' +  str(len(input_list)))
    print('Total unique devices: ' + str(len(unique_values)))
    print('Total blanks: ' + str(blank_counter))
    
    if (len(input_list)!=len(unique_values)):
        print("Total duplicates: " + str(len(input_list)-len(unique_values)))
        print("Total non_issue_duplicates: ", len(non_issue_duplicate_values))
        if(len(duplicate_values) > 1 or duplicate_values):
            print(color.BOLD + "Duplicate records found. Please investigate then delete applicable record(s)." \
                  + color.END)
    return duplicate_values
    
#####################################
#               List orig vs duplicates count
#####################################

for list_name, data_set in zip(source_file_names,source_file_lists):
    print (list_name)
    print (find_unique_and_none_blank(data_set), "\n")

    
print("I did the thing!")

## Check if serial in list (accounts for leading chars)

In [None]:
def check_serial_in_list(input_serial, check_against_list):
    """
        Checks if given serial number is in a list, applies leading character if pre-defined.
        Args:
            input_serial
            check_against_list
        Returns:
            input_serial if found (with leading if applicable)
            None if not found
    """
    if (input_serial in check_against_list):
        return input_serial
    for potential_leadering_char in potential_leading_chars:
        if ((potential_leadering_char+input_serial) in check_against_list):
    #         print('S added to beginning of Serial for ' + str(input_serial))
            return (potential_leadering_char+input_serial)
        elif ((input_serial[1:]) in check_against_list):
    #         print('First Character removed to beginning of Serial for ' + str(input_serial))
            return (input_serial[1:])
    return None
    
#test above function
print(check_serial_silly_s("S"+source_file_lists[0][0]['serial_number'],"S" + source_file_lists[0][0]['serial_number']))
print(check_serial_silly_s("S"+source_file_lists[0][0]['serial_number'],(source_file_lists[0][0]['serial_number'])))
print(check_serial_silly_s(source_file_lists[0][0]['serial_number'],(source_file_lists[0][0]['serial_number'])))
print("I did the thing!")

## Create Unique Asset Serial Number Lists (removing duplicates)

In [None]:
#####################################
#     Create unique value list (remove duplicates)
#####################################

## Find any one student ib_devices where the student is missing from the daily engagement table.
## Output that enrollment.

def find_unique_and_return_list(input_list):
    """
        Create unique serial number list (remove duplicates, only serial numbers returned)
        Args:
            input_list
        Returns:
            list(unique_values)
    """
    unique_values = set()
    for row in input_list:
        if row['serial_number'] not in unique_values:
            unique_values.add(row['serial_number'])
    print('Total unique devices after check: ' + str(len(unique_values)))
    if (len(input_list)!=len(unique_values)):
        print("This function returned a list that removed the following number of duplicates: " \
              + str(len(input_list)-len(unique_values)) + '\n')
    return list(unique_values)

source_file_unique_serials = list()
source_file_unique_serials_large = list()

print(source_file_names)

for num, data_set in enumerate(source_file_lists, start=1):
    print("\nCreating new list of unique serial numbers from file: {}. \nNamed: data_set_unique_serials_{}".format(source_file_names[num-1], num))
    locals()["data_set_unique_serials_" + str(num)] = find_unique_and_return_list(data_set)
    source_file_unique_serials.append(locals()["data_set_unique_serials_" + str(num)])
    
for num, data_set in enumerate(source_file_lists_large, start=1):
    print("\nCreating new list of unique serial numbers from file: {}. \nNamed: data_set_unique_serials_large_{}".format(source_file_names_large[num-1], num))
    locals()["data_set_unique_serials_large_" + str(num)] = find_unique_and_return_list(data_set)
    source_file_unique_serials_large.append(locals()["data_set_unique_serials_large_" + str(num)])
    
print("I did the thing!")


## Missing Records - Filter for Search Part 1: FUNCTION

In [None]:
def filter_out(filter_keys_values_list, unique_serials_list, orig_data_set):
    """
    #####################################
        Removes entries based on filter
        Args: 
            list_a 
            key
            key 2 (if applicable)
        Returns:
            filtered_list
    #####################################
    """
    filtered_list = list()
    removed_list = list()
    print('\nBefore: quantity of lists\' entries: ' + str(len(unique_serials_list)))

    # create data list using unique serial numbers:
    full_data_unique = list()
    for unique_entry in unique_serials_list:
        # find orig data entry
        for orig_entry in orig_data_set:
            if (unique_entry == orig_entry['serial_number']):
                full_data_unique.append(orig_entry)
                break
    
    filtered_list =  full_data_unique
    
    for filter_item in filter_keys_values_list:
        key = filter_item[0]
        value = filter_item[1]
        print("Removing key/value: ", key, value)
        if key in full_data_unique[0].keys():
            for entry in full_data_unique:
                if entry[key] == value:
                    #print("Removing: ", entry['serial_number'])
                    removed_list.append(entry)
                    filtered_list.remove(entry)
                    
    if (removed_list):
        print("Number of entries removed: " + color.RED + str(len(removed_list)) + color.END + " out of a total " + str(len(unique_serials_list)))
        
    print('After: quantity of lists\' entries: ' + str(len(full_data_unique)))
    return filtered_list
print("I did the thing!")

## Missing Records - Filter for Search Part 2: PERFORM

In [None]:

source_file_unique_filtered_data = list()
source_file_unique_filtered_data_large = list()

for num, data_set in enumerate(source_file_lists, start=1):
    print("\nCreating new list of filtered devices: {}. \nNamed: data_set_unique_filtered_data_{}".format(source_file_names[num-1], num))
    locals()["data_set_unique_filtered_data_" + str(num)] = filter_out(key_and_value_to_filter_out, source_file_unique_serials[num-1], data_set)
    source_file_unique_filtered_data.append(locals()["data_set_unique_filtered_data_" + str(num)])
    
for num, data_set in enumerate(source_file_lists_large, start=1):
    print("\nCreating new list of unique serial numbers from file: {}. \nNamed: data_set_unique_serials_large_{}".format(source_file_names_large[num-1], num))
    locals()["data_set_unique_serials_large_" + str(num)] = find_unique_and_return_list(data_set)
    #locals()["data_set_unique_serials_large_" + str(num)] = filter_out(key_and_value_to_filter_out, source_file_unique_serials_large[num-1], data_set)
    source_file_unique_filtered_data_large.append(locals()["data_set_unique_serials_large_" + str(num)])

print("I did the thing!")

## Missing Records - Find/List Missing Serials From Each DB FUNCTION

In [None]:
from tabulate import tabulate
from prettytable import PrettyTable
from datetime import datetime

#comparison function between two lists 
def check_and_list_missing_serials_in_lists(list_a, list_b):
    """
    #####################################
        Takes in two lists (each list is a list of dicts for a devices e.g. serial_number key to asdf1234 value)
            and compares them to each other finding matches and "not_found" based on serial_number.
        Args: 
            list_a - entries to be check, return values are based on these entries
            list_b - entires to check against
        Returns:
            found_matches - a list of entries from list_a that were found in list_b based on serial_number
            not_found - a list of entries from list_a that were NOT found in list_b based on serial_number
    #####################################
    """
    
    #Print output 
    print(color.UNDERLINE +'\nComparison stats:\n' + color.END + \
     " • " + 'Quantity of primary lists\' entries: ' + str(len(list_a)))

    found_matches = list()
    not_found = list()
    for device in list_a:
        device_serial = device['serial_number'] 
        if check_serial_silly_s(device_serial, list_b):
            found_matches.append(device)
            continue
        else:
            not_found.append(device)
            
    #Print output         
    print(" • " + 'Matches found: ' + str(len(found_matches)))

    if (not_found):
        output_sample_records(not_found)

    return found_matches, not_found

print("I did the thing!")


## Missing Records - Find/List Missing Serials From Each OUTPUT FUNCTION

In [None]:
def output_sample_records(list_a):
    """
    #####################################
        Takes in one list and provides output table with applicable data for that list
        Args: 
            list_a - entries to be output in a list.  The list contains dicts for each asset data item.
        Returns:
            none
    #####################################
    """
    rows = list()
    headers = list()
    #Print output         
    print (" • " + 'Total Missing: ' + str(len(list_a)) + " \n" \
    + color.BOLD + "Please investigate then add applicable record(s). Some Key fields: \n" + color.END)

    print("----------------------------------------------------------")
    
    for potential_key in potential_data_headers_keys:
        if potential_key in list_a[0].keys():
            headers.append(potential_key)

    #Check if ROS headers exist and then output
    if (headers):
        for entry in list_a:
            #temp new row variable to add to rows as single line
            new_row = list()
            for found_potential_header in headers:
                new_row.append(entry[found_potential_header])
            rows.append(new_row)
                
    #else (no header matches)
    else:
        headers = 'serial_number'
        for entry in list_a:
            rows.append(entry['serial_number'])

    print(tabulate(rows, headers))

print("I did the thing!")

## Missing Records Part 1 - Find/List Missing Serials

In [None]:
#####################################
#                 LIST MISSING SERIAL NUMBERS
#####################################

total_lists = len(source_file_lists)

print("This section will compare each list against each other and provide missing device information.\nThere are {} lists.".format(total_lists))  

if total_lists > 1:
    data_set_matches_vs_missing_delta_Lists = list()
    for num, data_set in enumerate(source_file_unique_filtered_data, start=0):
        compare_to_index = num+1
        locals()["data_set_matches_vs_missing_" + str(num)] = list()
        while compare_to_index < total_lists:
            print(color.UNDERLINE +'\nList comparison:' + color.END + ' If any devices are listed below, they are in... \n'  + \
            " • " + "" + color.GREEN + source_file_names[num] + color.END + " but are missing from... \n" + \
            " • " +  color.BOLD + color.RED + "Missing from " + source_file_names[compare_to_index] + color.END)
            print("Coding troubleshooting (if needed) note: Iterator index: {}. \ncompare_to_index: {}.".format(num, \
                                                                        compare_to_index))    

            data_set_matches_vs_missing_delta_Lists.append(check_and_list_missing_serials_in_lists(source_file_unique_filtered_data[num], source_file_unique_serials[compare_to_index]))
        
            print("Adding found and not-found lists to data_set_matches_vs_missing_delta_Lists to account " + \
                "for the comparison of {} to {}".format(source_file_names[num], source_file_names[compare_to_index]))
            compare_to_index = compare_to_index+1
            print("\nThere are now {} data_set_matches_vs_missing_delta_Lists lists.".format(len(data_set_matches_vs_missing_delta_Lists)))
print("\nI did the thing!")


## Missing Records Part 2 - Reverse Order

In [None]:
#####################################
#                 LIST MISSING SERIAL NUMBERS
#####################################

#TO DO: itereate among filtered lists to check against unique serials lists

total_lists = len(source_file_lists)

print("This section will compare each list against each other and provide missing device information.\nThere are {} lists.".format(total_lists))  
print(total_lists)
if total_lists > 1:
    for num, data_set in reversed(list(enumerate(source_file_unique_filtered_data, start=0))):
        compare_to_index = num-1
        locals()["data_set_matches_vs_missing_" + str(num)] = list()
        print("num: ", num, ". compare_to_index: ", compare_to_index)
        while compare_to_index >= 0:
            print("(within while loop) num: ", num, ". compare_to_index: ", compare_to_index)

            print(color.UNDERLINE +'\nList comparison:' + color.END + ' If any devices are listed below, they are in... \n'  + \
            " • " + "" + color.GREEN + source_file_names[num] + color.END + " but are missing from... \n" + \
            " • " +  color.BOLD + color.RED + "Missing from " + source_file_names[compare_to_index] + color.END)
            print("Coding troubleshooting (if needed) note: Iterator index: {}. \ncompare_to_index: {}.".format(num, \
                                                                        compare_to_index))                
            
            data_set_matches_vs_missing_delta_Lists.append(check_and_list_missing_serials_in_lists(source_file_unique_filtered_data[num], source_file_unique_serials[compare_to_index]))
        
            print("Adding found and not-found lists to data_set_matches_vs_missing_delta_Lists to account " + \
                "for the comparison of {} to {}".format(source_file_names[num], source_file_names[compare_to_index]))
            
            compare_to_index = compare_to_index-1
            print("\nThere are now {} data_set_matches_vs_missing_delta_Lists lists.".format(len(data_set_matches_vs_missing_delta_Lists)))

print("\nI did the thing!")

## Code checkpoint, how many lists do we have?

In [None]:
# print("source_file_names: ", source_file_names)
# print("source_file_lists Length: ", len(source_file_lists))
# print("source_file_lists Length[0]: ", len(source_file_lists[0]))
# print("source_file_unique_serials Length: ", len(source_file_unique_serials))
# print("source_file_unique_serials[0] Length:", len(source_file_unique_serials[0]))
# print("source_file_unique_serials[0] Length:", type(source_file_unique_serials[0]))
# print("source_file_unique_filtered_data Length: ", len(source_file_unique_filtered_data))
# print("source_file_unique_filtered_data[0] Length:", len(source_file_unique_filtered_data[0]))
# print("source_file_unique_filtered_data[0][0] Length: ", len(source_file_unique_filtered_data[0][0]))
# print("Example of source_file_unique_filtered_data[0][0]: ", source_file_unique_filtered_data[0][0])

# print("\nI did the thing!\n")

# print("source_file_names_large: ", source_file_names_large)
# print("source_file_lists_large Length[0]: ", len(source_file_lists_large[0]))
# print("source_file_lists_large Length[0][0]: ", len(source_file_lists_large[0][0]))
# print("Example of source_file_lists_large[0][0]: ", source_file_lists_large[0][0])
# print("source_file_unique_filtered_data_large Length: ", len(source_file_unique_filtered_data_large))
# print("source_file_unique_filtered_data_large[0] Length: ", len(source_file_unique_filtered_data_large[0]))
# print("Example of source_file_unique_filtered_data_large: ", source_file_unique_filtered_data_large[0][0])
# print("source_file_unique_filtered_data_large[0][0] Length: ", len(source_file_unique_filtered_data_large[0][0]))
# print("Example of source_file_unique_filtered_data_large[0][0]: ", source_file_unique_filtered_data_large[0][0])

# print("\nI did the thing!\n")

# if (source_file_unique_filtered_data_large):
#     print ("source_file_unique_filtered_data_large Length: ", len(source_file_unique_filtered_data_large))
#     print ("source_file_unique_filtered_data_large Length[0]: ", len(source_file_unique_filtered_data_large[0]))
#     print ("source_file_unique_filtered_data_large Length[0][0]: ", len(source_file_unique_filtered_data_large[0][0]))
#     print("Found a reference file, sample output at [0][0]: ", source_file_unique_filtered_data_large[0][0])
# print("\nI did the thing!\n")

# if (data_set_matches_vs_missing_delta_Lists):
#     print ("data_set_matches_vs_missing_delta_Lists Length: ", len(data_set_matches_vs_missing_delta_Lists))
#     print ("data_set_matches_vs_missing_delta_Lists Length[0]: ", len(data_set_matches_vs_missing_delta_Lists[0]))
#     print ("data_set_matches_vs_missing_delta_Lists Length[0][0]: ", len(data_set_matches_vs_missing_delta_Lists[0][0]))
#     print ("data_set_matches_vs_missing_delta_Lists Length[0][1]: ", len(data_set_matches_vs_missing_delta_Lists[0][1]))
#     print ("data_set_matches_vs_missing_delta_Lists Length[0][0][0]: ", len(data_set_matches_vs_missing_delta_Lists[0][0][0]))
#     print ("data_set_matches_vs_missing_delta_Lists sample[0][0][0]: ", data_set_matches_vs_missing_delta_Lists[0][0][0])
#     print ("data_set_matches_vs_missing_delta_Lists sample[0][1][0]: ", data_set_matches_vs_missing_delta_Lists[0][1][0])

print("\nI did the thing!")

## Check missing serials against large REFERENCE file

In [None]:


print("So, now you have a delta of the lists provided. Do you want to see if any of delta \
    devices are in a larger reference file? e.g. check if they are in a full DB output instead \
    of a select DB output")
print("\nLet's check if we have a reference file and then if the any of the key fields are in \
    there that match other files...")

# TO DO: check if ref file exists and how many
found_in_ref_file = list()
# if there is a reference table..
if (source_file_unique_filtered_data_large):
    print("Found a reference file, sample output at [0][0]: ", source_file_unique_filtered_data_large[0][0])
    #if there are delta list(s)...
    if (data_set_matches_vs_missing_delta_Lists[0][1]):
        print("Found a delta file, entries start at [0][1][0].")
        # for each of the delta lists...
        for delta_list in data_set_matches_vs_missing_delta_Lists:

            print ("\n\nChecking a delta list of size: ", len(delta_list[1]), " [Working...]")
            # for each of delta sublists for not_found...
            for delta_entry_not_found in delta_list[1]:
                #create list of keys of within the delta list to check against the keys in the reference file
                keys_list = list()
                for key in delta_entry_not_found.keys(): 
                    keys_list.append(key)
                # if the entry columns match the reference columns...
                if keys_list[0] in source_file_lists_large[0][0].keys():
                    # if the entry serial number is found in a list of unique filtered serial numbers (based off ref file)
                    if delta_entry_not_found['serial_number'] in source_file_unique_filtered_data_large[0]:
                        
                        
                        
                        
                        for ref_data_point in source_file_lists_large[0]: 
#                             print(delta_entry_not_found['serial_number'])
                            if ref_data_point['serial_number'] == delta_entry_not_found['serial_number']:
                                found_in_ref_file.append(ref_data_point)
                                break
#                         print("I found a delta data entries in the ref table: ", delta_entry_not_found["serial_number"])
#                         print("They have the same column: ", keys_list[0])





            print ("Delta assets found in Reference file (running total): ", len(found_in_ref_file))


if(found_in_ref_file):
    output_sample_records(found_in_ref_file)                        
print("I did the thing!")

# TO DO: check ref file 1 for any unique fields in provided fields list

# TO DO: check delta lists for key fields and list using delta list entries as primary list

## Title for next section