In [1]:
import itertools
import csv

In [2]:
def read_file(filename):
    with open(filename, 'r') as file:
        yield from file

In [3]:
lines = read_file('employment.csv')

In [4]:
next(lines)

'employer,department,employee_id,ssn\n'

In [5]:
with open('personal_info.csv', 'r') as file:
    for line in itertools.islice(file, 5):
        print(line, end='')

ssn,first_name,last_name,gender,language
100-53-9824,Sebastiano,Tester,Male,Icelandic
101-71-4702,Cayla,MacDonagh,Female,Lao
101-84-0356,Nomi,Lipprose,Female,Yiddish
104-22-0928,Justinian,Kunzelmann,Male,Dhivehi


In [6]:
with open('vehicles.csv', 'r') as file:
    for line in itertools.islice(file, 5):
        print(line, end='')

ssn,vehicle_make,vehicle_model,model_year
100-53-9824,Oldsmobile,Bravada,1993
101-71-4702,Ford,Mustang,1997
101-84-0356,GMC,Yukon,2005
104-22-0928,Oldsmobile,Intrigue,2000


In [7]:
with open('employment.csv', 'r') as file:
    for line in itertools.islice(file, 15):
        print(line, end='')

employer,department,employee_id,ssn
Stiedemann-Bailey,Research and Development,29-0890771,100-53-9824
Nicolas and Sons,Sales,41-6841359,101-71-4702
Connelly Group,Research and Development,98-7952860,101-84-0356
Upton LLC,Marketing,56-9817552,104-22-0928
Zemlak-Olson,Business Development,46-2886707,104-84-7144
"Kohler, Bradtke and Davis",Support,80-0975518,105-27-5541
"Roberts, Torphy and Dach",Human Resources,77-4895332,105-85-7486
Lind-Jast,Marketing,79-6418731,105-91-5022
Bashirian-Lueilwitz,Engineering,44-3328799,105-91-7777
"Windler, Marks and Haley",Services,54-6271885,106-35-1938
Leffler-Hahn,Accounting,31-5735282,106-36-3293
Lueilwitz LLC,Marketing,33-9146042,110-84-3641
Davis Inc,Accounting,39-0400385,111-35-1034
Kunze LLC,Research and Development,78-2612900,114-06-6912


In [8]:
with open('update_status.csv', 'r') as file:
    for line in itertools.islice(file, 5):
        print(line, end='')

ssn,last_updated,created
100-53-9824,2017-10-07T00:14:42Z,2016-01-24T21:19:30Z
101-71-4702,2017-01-23T11:23:17Z,2016-01-27T04:32:57Z
101-84-0356,2017-10-04T11:21:30Z,2016-09-21T23:04:07Z
104-22-0928,2017-03-28T12:38:29Z,2016-04-15T11:37:17Z


In [9]:
from datetime import datetime
from collections import namedtuple

In [10]:
def parse_data(value):
    data_parser ={
        
        'ssn' : lambda number : int(number.replace('-','')),
        'model_year' : int,
        'employee_id' : lambda number : int(number.replace('-','')),
        'last_updated' : lambda date : datetime.strptime(date, '%Y-%m-%dT%H:%M:%SZ'),
        'created' : lambda date : datetime.strptime(date, '%Y-%m-%dT%H:%M:%SZ')
        
    }
    return (data_parser.get(datatype, str)(data) for data, datatype in value)

In [11]:
def file_to_iter(filename):
    with open(filename, 'r') as file:
        reader = csv.reader(file, delimiter=',', quotechar='"')
        headers = next(reader)
        name = filename.replace('.csv','').title()
        Data = namedtuple(name , headers)
        for line in reader:
            data = parse_data(zip(line, headers))
            yield Data(*data)

In [12]:
personal_info_iter = file_to_iter('personal_info.csv')

In [13]:
vehicles_iter = file_to_iter('vehicles.csv')

In [14]:
for line in itertools.islice(vehicles_iter, 5):
    print(line)

Vehicles(ssn=100539824, vehicle_make='Oldsmobile', vehicle_model='Bravada', model_year=1993)
Vehicles(ssn=101714702, vehicle_make='Ford', vehicle_model='Mustang', model_year=1997)
Vehicles(ssn=101840356, vehicle_make='GMC', vehicle_model='Yukon', model_year=2005)
Vehicles(ssn=104220928, vehicle_make='Oldsmobile', vehicle_model='Intrigue', model_year=2000)
Vehicles(ssn=104847144, vehicle_make='Ford', vehicle_model='Crown Victoria', model_year=2008)


In [15]:
updated_status_iter = file_to_iter('update_status.csv')

In [16]:
for line in itertools.islice(updated_status_iter, 15):
    print(line)

Update_Status(ssn=100539824, last_updated=datetime.datetime(2017, 10, 7, 0, 14, 42), created=datetime.datetime(2016, 1, 24, 21, 19, 30))
Update_Status(ssn=101714702, last_updated=datetime.datetime(2017, 1, 23, 11, 23, 17), created=datetime.datetime(2016, 1, 27, 4, 32, 57))
Update_Status(ssn=101840356, last_updated=datetime.datetime(2017, 10, 4, 11, 21, 30), created=datetime.datetime(2016, 9, 21, 23, 4, 7))
Update_Status(ssn=104220928, last_updated=datetime.datetime(2017, 3, 28, 12, 38, 29), created=datetime.datetime(2016, 4, 15, 11, 37, 17))
Update_Status(ssn=104847144, last_updated=datetime.datetime(2018, 2, 19, 1, 34, 33), created=datetime.datetime(2016, 3, 15, 14, 7, 57))
Update_Status(ssn=105275541, last_updated=datetime.datetime(2017, 7, 24, 8, 58, 52), created=datetime.datetime(2016, 7, 23, 17, 58, 35))
Update_Status(ssn=105857486, last_updated=datetime.datetime(2018, 2, 14, 11, 32, 39), created=datetime.datetime(2016, 12, 15, 5, 46, 43))
Update_Status(ssn=105915022, last_updated

In [17]:
for line in itertools.islice(personal_info_iter, 15):
    print(line)

Personal_Info(ssn=100539824, first_name='Sebastiano', last_name='Tester', gender='Male', language='Icelandic')
Personal_Info(ssn=101714702, first_name='Cayla', last_name='MacDonagh', gender='Female', language='Lao')
Personal_Info(ssn=101840356, first_name='Nomi', last_name='Lipprose', gender='Female', language='Yiddish')
Personal_Info(ssn=104220928, first_name='Justinian', last_name='Kunzelmann', gender='Male', language='Dhivehi')
Personal_Info(ssn=104847144, first_name='Claudianus', last_name='Brixey', gender='Male', language='Afrikaans')
Personal_Info(ssn=105275541, first_name='Federico', last_name='Aggett', gender='Male', language='Chinese')
Personal_Info(ssn=105857486, first_name='Angelina', last_name='McAvey', gender='Female', language='Punjabi')
Personal_Info(ssn=105915022, first_name='Moselle', last_name='Apfel', gender='Female', language='Latvian')
Personal_Info(ssn=105917777, first_name='Audi', last_name='Roach', gender='Female', language='Estonian')
Personal_Info(ssn=10635193

In [18]:
employment_iter = file_to_iter('employment.csv')

In [19]:
for line in itertools.islice(employment_iter, 15):
    print(line)

Employment(employer='Stiedemann-Bailey', department='Research and Development', employee_id=290890771, ssn=100539824)
Employment(employer='Nicolas and Sons', department='Sales', employee_id=416841359, ssn=101714702)
Employment(employer='Connelly Group', department='Research and Development', employee_id=987952860, ssn=101840356)
Employment(employer='Upton LLC', department='Marketing', employee_id=569817552, ssn=104220928)
Employment(employer='Zemlak-Olson', department='Business Development', employee_id=462886707, ssn=104847144)
Employment(employer='Kohler, Bradtke and Davis', department='Support', employee_id=800975518, ssn=105275541)
Employment(employer='Roberts, Torphy and Dach', department='Human Resources', employee_id=774895332, ssn=105857486)
Employment(employer='Lind-Jast', department='Marketing', employee_id=796418731, ssn=105915022)
Employment(employer='Bashirian-Lueilwitz', department='Engineering', employee_id=443328799, ssn=105917777)
Employment(employer='Windler, Marks an

# Goal 1

In [20]:
def parse_data(value):
    data_parser ={
        
        'ssn' : lambda number : int(number.replace('-','')),
        'model_year' : int,
        'employee_id' : lambda number : int(number.replace('-','')),
        'last_updated' : lambda date : datetime.strptime(date, '%Y-%m-%dT%H:%M:%SZ'),
        'created' : lambda date : datetime.strptime(date, '%Y-%m-%dT%H:%M:%SZ')
        
    }
    return (data_parser.get(datatype, str)(data) for data, datatype in value)

def file_to_iter(filename):
    with open(filename, 'r') as file:
        reader = csv.reader(file, delimiter=',', quotechar='"')
        name = filename.replace('.csv','').title()
        headers = next(reader)
        Data_tuple = namedtuple(name , headers)
        for line in reader:
            data = parse_data(zip(line, headers))
            yield Data_tuple(*data)

In [21]:
personal_info_iter = file_to_iter('personal_info.csv')
vehicles_iter = file_to_iter('vehicles.csv')
employment_iter = file_to_iter('employment.csv')
updated_status_iter = file_to_iter('update_status.csv')

In [22]:
length = 0
for data in personal_info_iter:
    length += 1
print(length)

1000


In [23]:
length = 0
for data in vehicles_iter:
    length += 1
print(length)

1000


In [24]:
length = 0
for data in employment_iter:
    length += 1
print(length)

1000


In [25]:
length = 0
for data in updated_status_iter:
    length += 1
print(length)

1000


In [26]:
personal_info_iter = file_to_iter('personal_info.csv')
vehicles_iter = file_to_iter('vehicles.csv')
employment_iter = file_to_iter('employment.csv')
updated_status_iter = file_to_iter('update_status.csv')

In [27]:
for data in personal_info_iter:
    print(data)

Personal_Info(ssn=100539824, first_name='Sebastiano', last_name='Tester', gender='Male', language='Icelandic')
Personal_Info(ssn=101714702, first_name='Cayla', last_name='MacDonagh', gender='Female', language='Lao')
Personal_Info(ssn=101840356, first_name='Nomi', last_name='Lipprose', gender='Female', language='Yiddish')
Personal_Info(ssn=104220928, first_name='Justinian', last_name='Kunzelmann', gender='Male', language='Dhivehi')
Personal_Info(ssn=104847144, first_name='Claudianus', last_name='Brixey', gender='Male', language='Afrikaans')
Personal_Info(ssn=105275541, first_name='Federico', last_name='Aggett', gender='Male', language='Chinese')
Personal_Info(ssn=105857486, first_name='Angelina', last_name='McAvey', gender='Female', language='Punjabi')
Personal_Info(ssn=105915022, first_name='Moselle', last_name='Apfel', gender='Female', language='Latvian')
Personal_Info(ssn=105917777, first_name='Audi', last_name='Roach', gender='Female', language='Estonian')
Personal_Info(ssn=10635193

In [28]:
for data in vehicles_iter:
    print(data)

Vehicles(ssn=100539824, vehicle_make='Oldsmobile', vehicle_model='Bravada', model_year=1993)
Vehicles(ssn=101714702, vehicle_make='Ford', vehicle_model='Mustang', model_year=1997)
Vehicles(ssn=101840356, vehicle_make='GMC', vehicle_model='Yukon', model_year=2005)
Vehicles(ssn=104220928, vehicle_make='Oldsmobile', vehicle_model='Intrigue', model_year=2000)
Vehicles(ssn=104847144, vehicle_make='Ford', vehicle_model='Crown Victoria', model_year=2008)
Vehicles(ssn=105275541, vehicle_make='Ford', vehicle_model='Mustang', model_year=2001)
Vehicles(ssn=105857486, vehicle_make='Chrysler', vehicle_model='300', model_year=2008)
Vehicles(ssn=105915022, vehicle_make='Isuzu', vehicle_model='Hombre Space', model_year=2000)
Vehicles(ssn=105917777, vehicle_make='Chevrolet', vehicle_model='Silverado 3500', model_year=2004)
Vehicles(ssn=106351938, vehicle_make='GMC', vehicle_model='Sonoma Club', model_year=1992)
Vehicles(ssn=106363293, vehicle_make='Volkswagen', vehicle_model='Touareg', model_year=2008)

Vehicles(ssn=670679679, vehicle_make='Infiniti', vehicle_model='Q', model_year=1993)
Vehicles(ssn=670893434, vehicle_make='Acura', vehicle_model='Legend', model_year=1988)
Vehicles(ssn=671161411, vehicle_make='Lexus', vehicle_model='HS', model_year=2012)
Vehicles(ssn=671719470, vehicle_make='Porsche', vehicle_model='924 S', model_year=1987)
Vehicles(ssn=671768792, vehicle_make='Audi', vehicle_model='Q7', model_year=2010)
Vehicles(ssn=672608782, vehicle_make='Mazda', vehicle_model='Miata MX-5', model_year=1996)
Vehicles(ssn=673835713, vehicle_make='Toyota', vehicle_model='Celica', model_year=2000)
Vehicles(ssn=674640219, vehicle_make='Mercury', vehicle_model='Sable', model_year=1992)
Vehicles(ssn=675310953, vehicle_make='Volvo', vehicle_model='S80', model_year=2010)
Vehicles(ssn=675414272, vehicle_make='Toyota', vehicle_model='T100', model_year=1997)
Vehicles(ssn=678604630, vehicle_make='GMC', vehicle_model='Sonoma Club Coupe', model_year=1993)
Vehicles(ssn=679225301, vehicle_make='Chev

In [29]:
for data in employment_iter:
    print(data)

Employment(employer='Stiedemann-Bailey', department='Research and Development', employee_id=290890771, ssn=100539824)
Employment(employer='Nicolas and Sons', department='Sales', employee_id=416841359, ssn=101714702)
Employment(employer='Connelly Group', department='Research and Development', employee_id=987952860, ssn=101840356)
Employment(employer='Upton LLC', department='Marketing', employee_id=569817552, ssn=104220928)
Employment(employer='Zemlak-Olson', department='Business Development', employee_id=462886707, ssn=104847144)
Employment(employer='Kohler, Bradtke and Davis', department='Support', employee_id=800975518, ssn=105275541)
Employment(employer='Roberts, Torphy and Dach', department='Human Resources', employee_id=774895332, ssn=105857486)
Employment(employer='Lind-Jast', department='Marketing', employee_id=796418731, ssn=105915022)
Employment(employer='Bashirian-Lueilwitz', department='Engineering', employee_id=443328799, ssn=105917777)
Employment(employer='Windler, Marks an

Employment(employer='Quitzon Group', department='Sales', employee_id=524061438, ssn=659434520)
Employment(employer='Grimes-Emard', department='Marketing', employee_id=761285894, ssn=667573442)
Employment(employer='Abshire-Hahn', department='Marketing', employee_id=940531814, ssn=667877090)
Employment(employer='Stark-Runte', department='Support', employee_id=626777890, ssn=669129323)
Employment(employer='Stamm-Kassulke', department='Services', employee_id=410303264, ssn=669838959)
Employment(employer='Bahringer, Jacobs and Schimmel', department='Marketing', employee_id=930656725, ssn=670567817)
Employment(employer='Ernser-Crooks', department='Sales', employee_id=214570883, ssn=670666403)
Employment(employer='Hoeger, Gleason and Steuber', department='Services', employee_id=255267392, ssn=670679679)
Employment(employer='Mann, Trantow and Gusikowski', department='Marketing', employee_id=258521955, ssn=670893434)
Employment(employer='Rath Inc', department='Sales', employee_id=262704393, ssn

In [30]:
for data in updated_status_iter:
    print(data)

Update_Status(ssn=100539824, last_updated=datetime.datetime(2017, 10, 7, 0, 14, 42), created=datetime.datetime(2016, 1, 24, 21, 19, 30))
Update_Status(ssn=101714702, last_updated=datetime.datetime(2017, 1, 23, 11, 23, 17), created=datetime.datetime(2016, 1, 27, 4, 32, 57))
Update_Status(ssn=101840356, last_updated=datetime.datetime(2017, 10, 4, 11, 21, 30), created=datetime.datetime(2016, 9, 21, 23, 4, 7))
Update_Status(ssn=104220928, last_updated=datetime.datetime(2017, 3, 28, 12, 38, 29), created=datetime.datetime(2016, 4, 15, 11, 37, 17))
Update_Status(ssn=104847144, last_updated=datetime.datetime(2018, 2, 19, 1, 34, 33), created=datetime.datetime(2016, 3, 15, 14, 7, 57))
Update_Status(ssn=105275541, last_updated=datetime.datetime(2017, 7, 24, 8, 58, 52), created=datetime.datetime(2016, 7, 23, 17, 58, 35))
Update_Status(ssn=105857486, last_updated=datetime.datetime(2018, 2, 14, 11, 32, 39), created=datetime.datetime(2016, 12, 15, 5, 46, 43))
Update_Status(ssn=105915022, last_updated

Update_Status(ssn=309388085, last_updated=datetime.datetime(2017, 10, 15, 2, 43, 14), created=datetime.datetime(2016, 5, 12, 11, 34, 16))
Update_Status(ssn=310602027, last_updated=datetime.datetime(2018, 3, 20, 12, 53, 17), created=datetime.datetime(2016, 10, 25, 3, 4, 20))
Update_Status(ssn=311469296, last_updated=datetime.datetime(2018, 2, 18, 15, 38, 24), created=datetime.datetime(2016, 8, 6, 19, 3, 48))
Update_Status(ssn=312387084, last_updated=datetime.datetime(2017, 8, 7, 14, 33, 15), created=datetime.datetime(2016, 12, 1, 17, 51, 41))
Update_Status(ssn=312593476, last_updated=datetime.datetime(2017, 6, 5, 16, 36, 40), created=datetime.datetime(2016, 6, 11, 17, 37, 47))
Update_Status(ssn=312773647, last_updated=datetime.datetime(2017, 4, 30, 2, 56, 29), created=datetime.datetime(2016, 1, 13, 20, 57, 51))
Update_Status(ssn=313027822, last_updated=datetime.datetime(2017, 10, 7, 16, 22, 13), created=datetime.datetime(2016, 12, 9, 3, 16, 22))
Update_Status(ssn=313527312, last_updated

Update_Status(ssn=458970499, last_updated=datetime.datetime(2018, 1, 9, 9, 39, 25), created=datetime.datetime(2016, 11, 24, 23, 17, 7))
Update_Status(ssn=459297955, last_updated=datetime.datetime(2017, 2, 20, 1, 40, 33), created=datetime.datetime(2016, 1, 30, 15, 41, 29))
Update_Status(ssn=460650321, last_updated=datetime.datetime(2017, 1, 21, 12, 2, 3), created=datetime.datetime(2016, 5, 24, 10, 31, 27))
Update_Status(ssn=461842780, last_updated=datetime.datetime(2017, 4, 22, 5, 6, 42), created=datetime.datetime(2016, 5, 4, 17, 39, 40))
Update_Status(ssn=461888139, last_updated=datetime.datetime(2017, 7, 17, 1, 5, 33), created=datetime.datetime(2016, 11, 27, 13, 35, 53))
Update_Status(ssn=464211055, last_updated=datetime.datetime(2017, 3, 8, 21, 0, 6), created=datetime.datetime(2016, 10, 22, 4, 13, 2))
Update_Status(ssn=464847886, last_updated=datetime.datetime(2017, 12, 14, 0, 31, 20), created=datetime.datetime(2016, 7, 17, 23, 20, 6))
Update_Status(ssn=465960362, last_updated=dateti

Update_Status(ssn=767556767, last_updated=datetime.datetime(2017, 4, 20, 11, 34, 8), created=datetime.datetime(2016, 6, 27, 19, 32, 3))
Update_Status(ssn=768545764, last_updated=datetime.datetime(2017, 8, 30, 1, 27, 30), created=datetime.datetime(2016, 6, 29, 17, 1, 52))
Update_Status(ssn=768748540, last_updated=datetime.datetime(2017, 11, 26, 21, 46, 34), created=datetime.datetime(2016, 10, 20, 21, 46, 50))
Update_Status(ssn=770618252, last_updated=datetime.datetime(2018, 2, 3, 13, 33, 14), created=datetime.datetime(2016, 4, 17, 6, 15, 58))
Update_Status(ssn=771206900, last_updated=datetime.datetime(2017, 11, 23, 12, 17, 15), created=datetime.datetime(2016, 8, 30, 12, 37, 4))
Update_Status(ssn=771710505, last_updated=datetime.datetime(2017, 7, 13, 12, 42, 39), created=datetime.datetime(2016, 3, 7, 13, 28, 27))
Update_Status(ssn=773219639, last_updated=datetime.datetime(2018, 3, 2, 8, 43, 41), created=datetime.datetime(2016, 2, 7, 11, 6, 8))
Update_Status(ssn=773537438, last_updated=da

In [31]:
personal_info_iter = file_to_iter('personal_info.csv')
vehicles_iter = file_to_iter('vehicles.csv')
employment_iter = file_to_iter('employment.csv')
updated_status_iter = file_to_iter('update_status.csv')

In [32]:
def single_record(*files):
    file_iters = (file_to_iter(file) for file in files)
    zipped_file_iters = zip(*file_iters)
    for data in zipped_file_iters:
        yield data

In [33]:
r = single_record('personal_info.csv', 'vehicles.csv', 'employment.csv', 'update_status.csv')

In [34]:
next(r)

(Personal_Info(ssn=100539824, first_name='Sebastiano', last_name='Tester', gender='Male', language='Icelandic'),
 Vehicles(ssn=100539824, vehicle_make='Oldsmobile', vehicle_model='Bravada', model_year=1993),
 Employment(employer='Stiedemann-Bailey', department='Research and Development', employee_id=290890771, ssn=100539824),
 Update_Status(ssn=100539824, last_updated=datetime.datetime(2017, 10, 7, 0, 14, 42), created=datetime.datetime(2016, 1, 24, 21, 19, 30)))

In [35]:
next(r)[0]._fields,next(r)[1]._fields,next(r)[2]._fields,next(r)[3]._fields

(('ssn', 'first_name', 'last_name', 'gender', 'language'),
 ('ssn', 'vehicle_make', 'vehicle_model', 'model_year'),
 ('employer', 'department', 'employee_id', 'ssn'),
 ('ssn', 'last_updated', 'created'))

In [36]:
{*next(r)[0]._fields,*next(r)[1]._fields,*next(r)[2]._fields,*next(r)[3]._fields}

{'created',
 'department',
 'employee_id',
 'employer',
 'first_name',
 'gender',
 'language',
 'last_name',
 'last_updated',
 'model_year',
 'ssn',
 'vehicle_make',
 'vehicle_model'}

In [37]:
next(r)[0]._asdict(), next(r)[1]._asdict(), next(r)[2]._asdict(), next(r)[3]._asdict()

({'ssn': 106351938,
  'first_name': 'Mackenzie',
  'last_name': 'Nussey',
  'gender': 'Male',
  'language': 'Swedish'},
 {'ssn': 106363293,
  'vehicle_make': 'Volkswagen',
  'vehicle_model': 'Touareg',
  'model_year': 2008},
 {'employer': 'Lueilwitz LLC',
  'department': 'Marketing',
  'employee_id': 339146042,
  'ssn': 110843641},
 {'ssn': 111351034,
  'last_updated': datetime.datetime(2017, 3, 18, 14, 51, 4),
  'created': datetime.datetime(2016, 8, 21, 7, 36, 17)})

In [38]:
new = namedtuple('New', (*next(r)[0]._fields,*next(r)[1]._fields,*next(r)[2]._fields,*next(r)[3]._fields),
                 rename = True)

In [39]:
new

__main__.New

In [40]:
new(*next(r)[0]._asdict(), *next(r)[1]._asdict(), *next(r)[2]._asdict(), *next(r)[3]._asdict())

New(ssn='ssn', first_name='first_name', last_name='last_name', gender='gender', language='language', _5='ssn', vehicle_make='vehicle_make', vehicle_model='vehicle_model', model_year='model_year', employer='employer', department='department', employee_id='employee_id', _12='ssn', _13='ssn', last_updated='last_updated', created='created')

In [41]:
def find_fields(files):
    headers = list()
    for file in files:
        with open(file, 'r') as f:
            headers.append(next(f))
    return set(headers)

In [42]:
def single_record(*files):
    file_iters = (file_to_iter(file) for file in files)
    fields = find_fields(files)
    return fields
    zipped_file_iters = zip(*file_iters)
    Full_Data = namedtuple('Full_Data')
    #for data in zipped_file_iters:
        #yield data

In [43]:
single_record('personal_info.csv', 'vehicles.csv', 'employment.csv', 'update_status.csv')

{'employer,department,employee_id,ssn\n',
 'ssn,first_name,last_name,gender,language\n',
 'ssn,last_updated,created\n',
 'ssn,vehicle_make,vehicle_model,model_year\n'}

In [44]:
{**next(r)[0]._asdict(), **next(r)[1]._asdict(), **next(r)[2]._asdict(), **next(r)[3]._asdict()}.keys()

dict_keys(['ssn', 'first_name', 'last_name', 'gender', 'language', 'vehicle_make', 'vehicle_model', 'model_year', 'employer', 'department', 'employee_id', 'last_updated', 'created'])

In [45]:
help(set)

Help on class set in module builtins:

class set(object)
 |  set() -> new empty set object
 |  set(iterable) -> new set object
 |  
 |  Build an unordered collection of unique elements.
 |  
 |  Methods defined here:
 |  
 |  __and__(self, value, /)
 |      Return self&value.
 |  
 |  __contains__(...)
 |      x.__contains__(y) <==> y in x.
 |  
 |  __eq__(self, value, /)
 |      Return self==value.
 |  
 |  __ge__(self, value, /)
 |      Return self>=value.
 |  
 |  __getattribute__(self, name, /)
 |      Return getattr(self, name).
 |  
 |  __gt__(self, value, /)
 |      Return self>value.
 |  
 |  __iand__(self, value, /)
 |      Return self&=value.
 |  
 |  __init__(self, /, *args, **kwargs)
 |      Initialize self.  See help(type(self)) for accurate signature.
 |  
 |  __ior__(self, value, /)
 |      Return self|=value.
 |  
 |  __isub__(self, value, /)
 |      Return self-=value.
 |  
 |  __iter__(self, /)
 |      Implement iter(self).
 |  
 |  __ixor__(self, value, /)
 |      Re

# Goal 2

In [46]:
def single_record(*files):
    file_iters = (file_to_iter(file) for file in files)
    zipped_iters = zip(*file_iters)
    data1 = next(zipped_iters)
    data1_dict = { key : value for datum in data1 for key, value in datum._asdict().items()}
    fields =  data1_dict.keys()
    Full_Data = namedtuple('FulL_Data', fields)
    yield Full_Data(**data1_dict)
    for data in zipped_iters:
        yield Full_Data(**{ key:value for datum in data for key, value in datum._asdict().items()})

In [47]:
data = single_record('personal_info.csv', 'vehicles.csv', 'employment.csv', 'update_status.csv')

In [48]:
sum(1 for datum in data)

1000

In [49]:
help(datetime)

Help on class datetime in module datetime:

class datetime(date)
 |  datetime(year, month, day[, hour[, minute[, second[, microsecond[,tzinfo]]]]])
 |  
 |  The year, month and day arguments are required. tzinfo may be None, or an
 |  instance of a tzinfo subclass. The remaining arguments may be ints.
 |  
 |  Method resolution order:
 |      datetime
 |      date
 |      builtins.object
 |  
 |  Methods defined here:
 |  
 |  __add__(self, value, /)
 |      Return self+value.
 |  
 |  __eq__(self, value, /)
 |      Return self==value.
 |  
 |  __ge__(self, value, /)
 |      Return self>=value.
 |  
 |  __getattribute__(self, name, /)
 |      Return getattr(self, name).
 |  
 |  __gt__(self, value, /)
 |      Return self>value.
 |  
 |  __hash__(self, /)
 |      Return hash(self).
 |  
 |  __le__(self, value, /)
 |      Return self<=value.
 |  
 |  __lt__(self, value, /)
 |      Return self<value.
 |  
 |  __ne__(self, value, /)
 |      Return self!=value.
 |  
 |  __radd__(self, value

In [50]:
date1 = datetime(2000, 6, 13)

In [51]:
date1

datetime.datetime(2000, 6, 13, 0, 0)

In [52]:
date2 = datetime(2004, 9, 10)

In [53]:
date2

datetime.datetime(2004, 9, 10, 0, 0)

In [54]:
date1 < date2

True

In [55]:
date2 < date1

False

# Goal3

In [56]:
last_stale_date = datetime(2017, 3, 1)

In [57]:
data = single_record('personal_info.csv', 'vehicles.csv', 'employment.csv', 'update_status.csv')

In [58]:
record = filter(lambda data : data.last_updated >= last_stale_date, data)

In [59]:
sum(1 for _ in record)

871

In [60]:
data = single_record('personal_info.csv', 'vehicles.csv', 'employment.csv', 'update_status.csv')

In [61]:
r = (datum for datum in data if datum.last_updated < last_stale_date)

In [62]:
for item in r:
    print(item, end='\n\n')

FulL_Data(ssn=101714702, first_name='Cayla', last_name='MacDonagh', gender='Female', language='Lao', vehicle_make='Ford', vehicle_model='Mustang', model_year=1997, employer='Nicolas and Sons', department='Sales', employee_id=416841359, last_updated=datetime.datetime(2017, 1, 23, 11, 23, 17), created=datetime.datetime(2016, 1, 27, 4, 32, 57))

FulL_Data(ssn=117779748, first_name='Lucien', last_name='Smalman', gender='Male', language='Hebrew', vehicle_make='Audi', vehicle_model='S6', model_year=2009, employer='Schmeler and Sons', department='Research and Development', employee_id=623275199, last_updated=datetime.datetime(2017, 2, 18, 23, 9, 25), created=datetime.datetime(2016, 2, 2, 15, 41, 42))

FulL_Data(ssn=123883381, first_name='Christiane', last_name='Hynes', gender='Female', language='Danish', vehicle_make='Ford', vehicle_model='Mustang', model_year=1989, employer='Mueller-Rath', department='Human Resources', employee_id=58069298, last_updated=datetime.datetime(2017, 2, 21, 16, 37,

FulL_Data(ssn=397878282, first_name='Kyrstin', last_name='Waldock', gender='Female', language='Belarusian', vehicle_make='Chevrolet', vehicle_model='Blazer', model_year=1998, employer='Jones Inc', department='Sales', employee_id=482299282, last_updated=datetime.datetime(2017, 1, 25, 21, 15, 13), created=datetime.datetime(2016, 6, 24, 8, 10, 55))

FulL_Data(ssn=411434217, first_name='Cull', last_name='Noddle', gender='Male', language='Spanish', vehicle_make='Buick', vehicle_model='Century', model_year=1986, employer='Senger, Schuppe and Mueller', department='Marketing', employee_id=270379629, last_updated=datetime.datetime(2017, 2, 19, 23, 2, 59), created=datetime.datetime(2016, 9, 20, 2, 3, 16))

FulL_Data(ssn=412590452, first_name='Seamus', last_name='Petrusch', gender='Male', language='Spanish', vehicle_make='Honda', vehicle_model='Prelude', model_year=1985, employer='VonRueden, Zboncak and Erdman', department='Legal', employee_id=624306075, last_updated=datetime.datetime(2017, 1, 3,

FulL_Data(ssn=878128265, first_name='Nance', last_name='Doble', gender='Female', language='Dhivehi', vehicle_make='Chrysler', vehicle_model='Concorde', model_year=1998, employer='Witting, Schuster and Barton', department='Research and Development', employee_id=544161357, last_updated=datetime.datetime(2017, 1, 11, 18, 36, 16), created=datetime.datetime(2016, 3, 14, 14, 30, 37))

FulL_Data(ssn=880224449, first_name='Zebadiah', last_name='Halliburton', gender='Male', language='Azeri', vehicle_make='Toyota', vehicle_model='Camry', model_year=2008, employer='Harber-Legros', department='Training', employee_id=599280036, last_updated=datetime.datetime(2017, 1, 13, 19, 50, 26), created=datetime.datetime(2016, 6, 11, 20, 35, 38))

FulL_Data(ssn=882495290, first_name='Tamarah', last_name='Stenson', gender='Female', language='Armenian', vehicle_make='GMC', vehicle_model='Savana 3500', model_year=2011, employer='Zulauf Inc', department='Accounting', employee_id=467067366, last_updated=datetime.da

In [102]:
def get_filtered_data(*files, filter_key = None):
    result = single_record(*files)
    yield from filter(filter_key, result)

In [105]:
data = get_filtered_data('personal_info.csv', 'vehicles.csv', 'employment.csv', 'update_status.csv',
                             filter_key = lambda x : x.ssn == 101714702)

In [106]:
for item in data:
    print(item)

FulL_Data(ssn=101714702, first_name='Cayla', last_name='MacDonagh', gender='Female', language='Lao', vehicle_make='Ford', vehicle_model='Mustang', model_year=1997, employer='Nicolas and Sons', department='Sales', employee_id=416841359, last_updated=datetime.datetime(2017, 1, 23, 11, 23, 17), created=datetime.datetime(2016, 1, 27, 4, 32, 57))


In [120]:
data = get_filtered_data('personal_info.csv', 'vehicles.csv', 'employment.csv', 'update_status.csv',
                             filter_key = lambda x : x.language == 'Tamil')

In [121]:
for item in data:
    print(item)
    print()

FulL_Data(ssn=212411496, first_name='Agnese', last_name='Lawes', gender='Female', language='Tamil', vehicle_make='Volkswagen', vehicle_model='Eurovan', model_year=1995, employer='Hills-Luettgen', department='Engineering', employee_id=496662355, last_updated=datetime.datetime(2017, 7, 7, 20, 29, 9), created=datetime.datetime(2016, 10, 27, 12, 47, 10))

FulL_Data(ssn=296904908, first_name='Holt', last_name='Angel', gender='Male', language='Tamil', vehicle_make='Jensen', vehicle_model='Interceptor', model_year=1967, employer='Mayert-Volkman', department='Research and Development', employee_id=117283397, last_updated=datetime.datetime(2017, 5, 8, 13, 53, 40), created=datetime.datetime(2016, 4, 12, 0, 23, 9))

FulL_Data(ssn=412798951, first_name='Giorgi', last_name='Sisnett', gender='Male', language='Tamil', vehicle_make='Volkswagen', vehicle_model='Golf', model_year=2011, employer='Hilll, Hickle and Koelpin', department='Services', employee_id=513888263, last_updated=datetime.datetime(2017

In [127]:
male_data = get_filtered_data('personal_info.csv', 'vehicles.csv', 'employment.csv', 'update_status.csv',
                             filter_key = lambda x : x.gender == 'Male')

In [128]:
result = sorted(male_data, key = lambda x : x.vehicle_make)

In [156]:
groups = itertools.groupby(result, lambda x: x.vehicle_make)

In [157]:
make_dict = {make:sum(1 for person in persons) for make, persons in groups}

In [164]:
a = sorted(make_dict.items(), key = lambda item : item[1], reverse=True)

In [166]:
total = 0
for key, value in a:
    total += value
print(total)

507


In [168]:
female_data = get_filtered_data('personal_info.csv', 'vehicles.csv', 'employment.csv', 'update_status.csv',
                             filter_key = lambda x : x.gender == 'Female')
result = sorted(female_data, key = lambda x : x.vehicle_make)
groups = itertools.groupby(result, lambda x: x.vehicle_make)
make_dict = {make:sum(1 for person in persons) for make, persons in groups}

In [170]:
b = sorted(make_dict.items(), key = lambda item : item[1], reverse=True)

In [172]:
total = 0
for key, value in b:
    total += value
print(total)

493


In [185]:
def get_filtered_grouped_data(*files, filter_key=None, group_by=None):
    data = sorted(get_filtered_data(*files, filter_key=filter_key), key=group_by)
    groups = itertools.groupby(data, group_by)
    yield from groups
    

In [186]:
data = get_filtered_grouped_data('personal_info.csv', 'vehicles.csv', 'employment.csv', 'update_status.csv',
                             filter_key = lambda x : x.gender == 'Female',
                                group_by = lambda x : x.vehicle_make)

In [183]:
for key, group in data:
    print(key, list(group))
    print()

Acura [FulL_Data(ssn=156916848, first_name='Carol', last_name='Cromar', gender='Female', language='Amharic', vehicle_make='Acura', vehicle_model='TL', model_year=2005, employer='King LLC', department='Research and Development', employee_id=394026042, last_updated=datetime.datetime(2017, 8, 30, 22, 29, 10), created=datetime.datetime(2016, 5, 7, 6, 45, 32)), FulL_Data(ssn=210914729, first_name='Tamarra', last_name='Minchella', gender='Female', language='Greek', vehicle_make='Acura', vehicle_model='Integra', model_year=1997, employer='Weber, Reynolds and Brakus', department='Training', employee_id=260100410, last_updated=datetime.datetime(2017, 7, 11, 8, 17, 40), created=datetime.datetime(2016, 12, 4, 0, 41, 8)), FulL_Data(ssn=298706018, first_name='Minny', last_name='Van der Baaren', gender='Female', language='Afrikaans', vehicle_make='Acura', vehicle_model='NSX', model_year=1997, employer='Sporer, Haley and Cartwright', department='Legal', employee_id=728129260, last_updated=datetime.da

In [218]:
def get_largest_group(groups):
    return sorted(groups, key=lambda group : sum(1 for _ in group[1]), reverse = True)

In [228]:
groups = get_filtered_grouped_data('personal_info.csv', 'vehicles.csv', 'employment.csv', 'update_status.csv',
                             filter_key = lambda x : x.gender == 'Female',
                                group_by = lambda x : x.vehicle_make)

In [229]:
for make, group in groups:
    print(f'\t\t\t\t\t\t------------------{make}-------------------')
    for person in group:
        print(person)
    print()

						------------------Acura-------------------
FulL_Data(ssn=156916848, first_name='Carol', last_name='Cromar', gender='Female', language='Amharic', vehicle_make='Acura', vehicle_model='TL', model_year=2005, employer='King LLC', department='Research and Development', employee_id=394026042, last_updated=datetime.datetime(2017, 8, 30, 22, 29, 10), created=datetime.datetime(2016, 5, 7, 6, 45, 32))
FulL_Data(ssn=210914729, first_name='Tamarra', last_name='Minchella', gender='Female', language='Greek', vehicle_make='Acura', vehicle_model='Integra', model_year=1997, employer='Weber, Reynolds and Brakus', department='Training', employee_id=260100410, last_updated=datetime.datetime(2017, 7, 11, 8, 17, 40), created=datetime.datetime(2016, 12, 4, 0, 41, 8))
FulL_Data(ssn=298706018, first_name='Minny', last_name='Van der Baaren', gender='Female', language='Afrikaans', vehicle_make='Acura', vehicle_model='NSX', model_year=1997, employer='Sporer, Haley and Cartwright', department='Legal', employe

In [230]:
groups = get_filtered_grouped_data('personal_info.csv', 'vehicles.csv', 'employment.csv', 'update_status.csv',
                             filter_key = lambda x : x.gender == 'Female',
                                group_by = lambda x : x.vehicle_make)

In [231]:
{make:sum(1 for _ in group) for make, group in groups}

{'Acura': 11,
 'Aston Martin': 2,
 'Audi': 14,
 'Austin': 1,
 'BMW': 13,
 'Bentley': 5,
 'Bugatti': 1,
 'Buick': 11,
 'Cadillac': 6,
 'Chevrolet': 48,
 'Chrysler': 8,
 'Dodge': 20,
 'Eagle': 1,
 'Ford': 48,
 'GMC': 23,
 'Geo': 1,
 'Honda': 10,
 'Hyundai': 4,
 'Infiniti': 9,
 'Isuzu': 3,
 'Jaguar': 3,
 'Jeep': 6,
 'Kia': 9,
 'Lamborghini': 3,
 'Land Rover': 9,
 'Lexus': 17,
 'Lincoln': 4,
 'Lotus': 7,
 'Mazda': 15,
 'Mercedes-Benz': 18,
 'Mercury': 9,
 'Mitsubishi': 25,
 'Morgan': 1,
 'Nissan': 12,
 'Oldsmobile': 8,
 'Panoz': 1,
 'Plymouth': 4,
 'Pontiac': 14,
 'Porsche': 5,
 'Rolls-Royce': 2,
 'Saab': 3,
 'Saturn': 3,
 'Scion': 3,
 'Smart': 1,
 'Subaru': 9,
 'Suzuki': 13,
 'Toyota': 24,
 'Volkswagen': 11,
 'Volvo': 15}

In [263]:
groups = get_filtered_grouped_data('personal_info.csv', 'vehicles.csv', 'employment.csv', 'update_status.csv',
                             filter_key = lambda x : x.gender == 'Female',
                                group_by = lambda x : x.vehicle_make)

In [264]:
sorted(groups, key = lambda group : len(list(x[1])))

[('Acura', <itertools._grouper at 0x4524988>),
 ('Aston Martin', <itertools._grouper at 0x7032868>),
 ('Audi', <itertools._grouper at 0x70327c0>),
 ('Austin', <itertools._grouper at 0x70326a0>),
 ('BMW', <itertools._grouper at 0x7032aa8>),
 ('Bentley', <itertools._grouper at 0x7032550>),
 ('Bugatti', <itertools._grouper at 0x7032640>),
 ('Buick', <itertools._grouper at 0x7032e80>),
 ('Cadillac', <itertools._grouper at 0x7032538>),
 ('Chevrolet', <itertools._grouper at 0x7032820>),
 ('Chrysler', <itertools._grouper at 0x6cc7f10>),
 ('Dodge', <itertools._grouper at 0x6cc7be0>),
 ('Eagle', <itertools._grouper at 0x6cc7e08>),
 ('Ford', <itertools._grouper at 0x6cc7d78>),
 ('GMC', <itertools._grouper at 0x6cc7e50>),
 ('Geo', <itertools._grouper at 0x6d17c28>),
 ('Honda', <itertools._grouper at 0x69db190>),
 ('Hyundai', <itertools._grouper at 0x69db178>),
 ('Infiniti', <itertools._grouper at 0x6f9df10>),
 ('Isuzu', <itertools._grouper at 0x6f9de80>),
 ('Jaguar', <itertools._grouper at 0x6f9d

In [289]:
def get_largest_group(groups):
    group_dict = {make : list(group) for make, group in groups}
    return sorted(group_dict.items(),key = lambda g : len(g[1]) , reverse=True)

In [290]:
groups = get_filtered_grouped_data('personal_info.csv', 'vehicles.csv', 'employment.csv', 'update_status.csv',
                             filter_key = lambda x : x.gender == 'Female',
                                group_by = lambda x : x.vehicle_make)

In [291]:
get_largest_group(groups)

[('Chevrolet',
  [FulL_Data(ssn=105917777, first_name='Audi', last_name='Roach', gender='Female', language='Estonian', vehicle_make='Chevrolet', vehicle_model='Silverado 3500', model_year=2004, employer='Bashirian-Lueilwitz', department='Engineering', employee_id=443328799, last_updated=datetime.datetime(2017, 5, 11, 1, 48, 32), created=datetime.datetime(2016, 5, 31, 0, 38, 13)),
   FulL_Data(ssn=114660984, first_name='Casandra', last_name='Juares', gender='Female', language='Lithuanian', vehicle_make='Chevrolet', vehicle_model='Camaro', model_year=1998, employer='Mayert, Rice and Schmitt', department='Product Management', employee_id=858596488, last_updated=datetime.datetime(2017, 6, 16, 17, 41, 33), created=datetime.datetime(2016, 5, 28, 20, 31, 25)),
   FulL_Data(ssn=119078817, first_name='Blondie', last_name='Powderham', gender='Female', language='Croatian', vehicle_make='Chevrolet', vehicle_model='Colorado', model_year=2005, employer='Terry LLC', department='Accounting', employee_

In [292]:
groups = get_filtered_grouped_data('personal_info.csv', 'vehicles.csv', 'employment.csv', 'update_status.csv',
                             filter_key = lambda x : x.gender == 'Female',
                                group_by = lambda x : x.vehicle_make)

In [293]:
length = lambda group : sum(1 for _ in group[1])
for group in groups:
    print(group[0])
    print(length(group))
    print()

Acura
11

Aston Martin
2

Audi
14

Austin
1

BMW
13

Bentley
5

Bugatti
1

Buick
11

Cadillac
6

Chevrolet
48

Chrysler
8

Dodge
20

Eagle
1

Ford
48

GMC
23

Geo
1

Honda
10

Hyundai
4

Infiniti
9

Isuzu
3

Jaguar
3

Jeep
6

Kia
9

Lamborghini
3

Land Rover
9

Lexus
17

Lincoln
4

Lotus
7

Mazda
15

Mercedes-Benz
18

Mercury
9

Mitsubishi
25

Morgan
1

Nissan
12

Oldsmobile
8

Panoz
1

Plymouth
4

Pontiac
14

Porsche
5

Rolls-Royce
2

Saab
3

Saturn
3

Scion
3

Smart
1

Subaru
9

Suzuki
13

Toyota
24

Volkswagen
11

Volvo
15

