In [86]:
from collections import namedtuple
import csv
from datetime import datetime

employment_filepath = 'files\\project_files\\employment.csv'
personal_filepath = 'files/project_files/personal_info.csv'
update_filepath = 'files/project_files/update_status.csv'
vehicles_filepath = 'files/project_files/vehicles.csv'

def employment_csv_reader(filepath):
    with open(filepath) as f:
        reader = csv.reader(f, delimiter=',', quotechar='"')
        Employment = namedtuple('Employment', next(reader))
        for i in reader:
            yield Employment(*i)


def personal_info_csv_reader(filepath):
    with open(filepath) as f:
        reader = csv.reader(f, delimiter=',', quotechar='"')
        Personal = namedtuple('Personal', next(reader))
        for i in reader:
            yield Personal(*i)


def update_status_csv_reader(filepath):
    with open(filepath) as f:
        reader = csv.reader(f, delimiter=',', quotechar='"')
        Update_status = namedtuple('Update_status', next(reader))
        for i in reader:
            i[1] = datetime.strptime(i[1], "%Y-%m-%dT%H:%M:%SZ").date()
            i[2] = datetime.strptime(i[2], "%Y-%m-%dT%H:%M:%SZ").date()
            yield Update_status(*i)


def vehicles_csv_reader(filepath):
    with open(filepath) as f:
        reader = csv.reader(f, delimiter=',', quotechar='"')
        Vehicle = namedtuple('Vehicle', next(reader))
        for i in reader:
            i[3] = int(i[3])
            yield Vehicle(*i)


emp_reader = employment_csv_reader(employment_filepath)
personal_reader = personal_info_csv_reader(personal_filepath)
update_reader = update_status_csv_reader(update_filepath)
vehicle_reader = vehicles_csv_reader(vehicles_filepath)



In [91]:
next(emp_reader)._fields

('employer', 'department', 'employee_id', 'ssn')

In [45]:
import glob
import os
from collections import namedtuple
import csv
from datetime import datetime, date
from itertools import islice

employment_filepath = 'files\\project_files\\employment.csv'
personal_filepath = 'files/project_files/personal_info.csv'
update_filepath = 'files/project_files/update_status.csv'
vehicles_filepath = 'files/project_files/vehicles.csv'

filepaths = (employment_filepath, personal_filepath, update_filepath, vehicles_filepath)
readers = (employment_csv_reader, personal_info_csv_reader, update_status_csv_reader, vehicles_csv_reader)

def comb_csv_reader():
    def employment_csv_reader(filepath):
        with open(filepath) as f:
            reader = csv.reader(f, delimiter=',', quotechar='"')
            yield next(reader)
            for i in reader:
                yield i


    def personal_info_csv_reader(filepath):
        with open(filepath) as f:
            reader = csv.reader(f, delimiter=',', quotechar='"')
            yield next(reader)[1:]
            for i in reader:
                yield i[1:]

    def update_status_csv_reader(filepath):
        with open(filepath) as f:
            reader = csv.reader(f, delimiter=',', quotechar='"')
            yield next(reader)[1:]
            for i in reader:
                i[1] = datetime.strptime(i[1], "%Y-%m-%dT%H:%M:%SZ").date()
                i[2] = datetime.strptime(i[2], "%Y-%m-%dT%H:%M:%SZ").date()
                yield i[1:]

    def vehicles_csv_reader(filepath):
        with open(filepath) as f:
            reader = csv.reader(f, delimiter=',', quotechar='"')
            yield next(reader)[1:]
            for i in reader:
                i[3] = int(i[3])
                yield i[1:]

    reader_dict = {f'{reader.__name__}': reader(filepath) for reader, filepath in zip(readers, filepaths)}
    colnames = []
    for reader in reader_dict.values():
        colnames += next(reader)

    Combined = namedtuple('Combined', colnames)
    print(colnames)
    done = False
    while not done:
        comb_vals = []
        try:
            for reader in reader_dict.values():
                comb_vals += next(reader)
        except StopIteration:
            done = True
            break
        if not comb_vals[8] < date(2017, 3, 1):
            combined_data = Combined(*comb_vals)
            yield combined_data

results = []
combo_reader = comb_csv_reader()
for entry in combo_reader:
    results.append(entry)


NameError: name 'employment_csv_reader' is not defined

In [9]:
from collections import namedtuple
import csv
from datetime import datetime, date
from itertools import chain, compress
import os

def filtered_csv_reader(key=None):
    def _csv_reader(filepath):
        with open(filepath) as f:
            reader = csv.reader(f, delimiter=',', quotechar='"')
            yield from reader


    def comb_csv_reader():
        filepaths = [root + file for root, _, files in list(os.walk('files/project_files/')) for file in files]   
        reader_dict = {f'{filepath.split(".")[-2].split("/")[-1] + "_csv_reader"}': _csv_reader(filepath) for filepath in filepaths}
        selector = (True, True, True, True, False, True, True, True, True, False, True, True, False, True, True, True)
        colnames = compress(chain.from_iterable(next(reader) for reader in reader_dict.values()), selector)

        Combined = namedtuple('Combined', colnames)

        done = False
        while not done:
            try:
                comb_vals = list(compress(chain.from_iterable(next(reader) for reader in reader_dict.values()), selector))
                comb_vals[8:10] = [(datetime.strptime(comb_vals[i], "%Y-%m-%dT%H:%M:%SZ").date()) for i in range(8, 10)]
                comb_vals[-1] = int(comb_vals[-1])
            except (StopIteration, RuntimeError):
                done = True
                break
            # if not comb_vals[8] < date(2017, 3, 1):
            yield Combined(*comb_vals)

    inter_reader = comb_csv_reader()
    yield from filter(key, inter_reader)

results = []
temp_reader = filtered_csv_reader(key=lambda x: x.last_updated >= date(2017, 3, 1))
for i in temp_reader:
    results.append(i)


In [10]:
len(results)

871

In [16]:
from itertools import groupby

results.sort(key = lambda x: (x.gender, x.vehicle_make))

make_groups = groupby(results, key = lambda x: x.gender + ' ' + x.vehicle_make)
make_counts = ((key, sum(1 for model in models)) for key, models in make_groups)

counts_list = list(make_counts)
counts_list.sort(key = lambda x: (x[0].split(' ')[0], x[1]), reverse=True)
counts_list

[('Male Ford', 40),
 ('Male Chevrolet', 30),
 ('Male GMC', 28),
 ('Male Mitsubishi', 28),
 ('Male Dodge', 22),
 ('Male Toyota', 21),
 ('Male Mercedes-Benz', 19),
 ('Male Volkswagen', 16),
 ('Male Audi', 14),
 ('Male Buick', 13),
 ('Male Mazda', 13),
 ('Male BMW', 12),
 ('Male Mercury', 11),
 ('Male Pontiac', 11),
 ('Male Volvo', 10),
 ('Male Cadillac', 9),
 ('Male Honda', 9),
 ('Male Hyundai', 8),
 ('Male Saab', 8),
 ('Male Subaru', 8),
 ('Male Acura', 7),
 ('Male Infiniti', 7),
 ('Male Jeep', 7),
 ('Male Lexus', 6),
 ('Male Nissan', 6),
 ('Male Kia', 5),
 ('Male Lincoln', 5),
 ('Male Lotus', 5),
 ('Male Oldsmobile', 5),
 ('Male Jaguar', 4),
 ('Male Lamborghini', 4),
 ('Male Plymouth', 4),
 ('Male Porsche', 4),
 ('Male Aston Martin', 3),
 ('Male Bentley', 3),
 ('Male Chrysler', 3),
 ('Male Isuzu', 3),
 ('Male Land Rover', 3),
 ('Male Maserati', 3),
 ('Male Saturn', 3),
 ('Male Geo', 2),
 ('Male Maybach', 2),
 ('Male Panoz', 2),
 ('Male Suzuki', 2),
 ('Male Aptera', 1),
 ('Male Austin',

In [34]:
from itertools import groupby, tee

results.sort(key = lambda x: (x.gender, x.vehicle_make))

group1 = groupby(results, key = lambda x: (x.gender, x.vehicle_make))
group2 = groupby(results, key = lambda x: (x.gender, x.vehicle_make))

group_f = (i for i in group1 if i[0][0] == 'Female')
f_list = []
for i in group_f:
    f_list.append((i[0], len(list(i[1]))))

group_m = (i for i in group2 if i[0][0] == 'Male')
m_list = []
for i in group_m:
    m_list.append((i[0], len(list(i[1]))))

In [64]:
temp_reader = filtered_csv_reader(key=lambda x: x.last_updated >= date(2017, 3, 1))
data_1, data_2 = tee(temp_reader, 2)

data_m = (row for row in data_1 if row.gender == 'Male')
sorted_data_m = sorted(data_m, key = lambda x: x.vehicle_make)
groups_m = groupby(sorted_data_m, key = lambda x: x.vehicle_make)

data_f = (row for row in data_2 if row.gender == 'Female')
sorted_data_f = sorted(data_f, key = lambda x: x.vehicle_make)
groups_f = groupby(sorted_data_f, key = lambda x: x.vehicle_make)

In [65]:
{veh_make: sum(1 for _ in this_iter) for veh_make, this_iter in groups_m}

{'Acura': 7,
 'Aptera': 1,
 'Aston Martin': 3,
 'Audi': 14,
 'Austin': 1,
 'BMW': 12,
 'Bentley': 3,
 'Buick': 13,
 'Cadillac': 9,
 'Chevrolet': 30,
 'Chrysler': 3,
 'Corbin': 1,
 'Daewoo': 1,
 'Dodge': 22,
 'Eagle': 1,
 'Ford': 40,
 'GMC': 28,
 'Geo': 2,
 'Honda': 9,
 'Hyundai': 8,
 'Infiniti': 7,
 'Isuzu': 3,
 'Jaguar': 4,
 'Jeep': 7,
 'Jensen': 1,
 'Kia': 5,
 'Lamborghini': 4,
 'Land Rover': 3,
 'Lexus': 6,
 'Lincoln': 5,
 'Lotus': 5,
 'Maserati': 3,
 'Maybach': 2,
 'Mazda': 13,
 'Mercedes-Benz': 19,
 'Mercury': 11,
 'Mitsubishi': 28,
 'Nissan': 6,
 'Oldsmobile': 5,
 'Panoz': 2,
 'Plymouth': 4,
 'Pontiac': 11,
 'Porsche': 4,
 'Rolls-Royce': 1,
 'Saab': 8,
 'Saturn': 3,
 'Scion': 1,
 'Smart': 1,
 'Subaru': 8,
 'Suzuki': 2,
 'Toyota': 21,
 'Volkswagen': 16,
 'Volvo': 10}

In [None]:
def group_data(filter_key, group_key, gender):
    temp_reader = filtered_csv_reader(key=filter_key)
    data = (row for row in data_1 if row.gender == gender)
    sorted_data = sorted(data, key=group_key)
    groups = groupby(sorted_data, key=group_key)