### Read text files

In [46]:
import bz2
import logging
import re
import pandas as pd
from datetime import datetime
from pprint import pprint

In [65]:
def parse_line(line):
    match = re.search(
        r'of (\d+).*started at ([^ ]+).*paid \$(\d+\.\d)',
        line)
    if not match:
        return None
    
    return {
        'count': int(match.group(1)),
        'start': (pd.to_datetime(match.group(2))).to_pydatetime(),
        'amount': float(match.group(3))
    }
    

In [66]:
def iter_rides(file_name):
    with bz2.open(file_name, 'rt') as fp:
        for lnum, line in enumerate(fp, 1):
            record = parse_line(line)  # line
            yield record

In [67]:
if __name__ == '__main__':
    for n, ride in enumerate(iter_rides('taxi.log.bz2')):
        pprint(ride)

{'amount': 20.5,
 'count': 1,
 'start': datetime.datetime(2018, 10, 31, 7, 10, 55)}
{'amount': 13.8,
 'count': 5,
 'start': datetime.datetime(2018, 10, 31, 16, 38, 25)}
{'amount': 11.3,
 'count': 1,
 'start': datetime.datetime(2018, 10, 31, 20, 23, 41)}
{'amount': 5.8,
 'count': 1,
 'start': datetime.datetime(2018, 10, 31, 22, 44, 24)}
{'amount': 13.5,
 'count': 1,
 'start': datetime.datetime(2018, 10, 31, 23, 22, 18)}
{'amount': 24.3,
 'count': 1,
 'start': datetime.datetime(2018, 10, 31, 23, 27, 39)}
{'amount': 11.6,
 'count': 1,
 'start': datetime.datetime(2018, 10, 31, 23, 40, 55)}
{'amount': 11.3,
 'count': 1,
 'start': datetime.datetime(2018, 10, 31, 23, 45, 53)}
{'amount': 6.8,
 'count': 1,
 'start': datetime.datetime(2018, 10, 31, 23, 46, 42)}
{'amount': 17.8,
 'count': 1,
 'start': datetime.datetime(2018, 10, 31, 23, 47, 30)}
{'amount': 18.8,
 'count': 1,
 'start': datetime.datetime(2018, 10, 31, 23, 47, 46)}
{'amount': 8.8,
 'count': 2,
 'start': datetime.datetime(2018, 10, 3

{'amount': 13.8, 'count': 3, 'start': datetime.datetime(2018, 11, 1, 1, 3, 51)}
{'amount': 13.8, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 1, 3, 54)}
{'amount': 8.8, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 1, 3, 55)}
{'amount': 11.1, 'count': 2, 'start': datetime.datetime(2018, 11, 1, 1, 3, 58)}
{'amount': 11.7, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 1, 4, 10)}
{'amount': 8.3, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 1, 4, 24)}
{'amount': 24.3, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 1, 4, 25)}
{'amount': 9.8, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 1, 4, 36)}
{'amount': 17.7, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 1, 4, 37)}
{'amount': 20.7, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 1, 4, 45)}
{'amount': 9.9, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 1, 4, 51)}
{'amount': 14.3, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 1, 4, 59)}
{'amount': 22.8, 'count': 1, 'start': dateti

{'amount': 34.7, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 2, 3, 52)}
{'amount': 8.3, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 2, 3, 57)}
{'amount': 12.3, 'count': 2, 'start': datetime.datetime(2018, 11, 1, 2, 4, 4)}
{'amount': 13.0, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 2, 4, 5)}
{'amount': 9.9, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 2, 4, 8)}
{'amount': 12.3, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 2, 4, 11)}
{'amount': 29.7, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 2, 4, 12)}
{'amount': 13.3, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 2, 4, 15)}
{'amount': 14.3, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 2, 4, 20)}
{'amount': 22.8, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 2, 4, 23)}
{'amount': 49.5, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 2, 4, 25)}
{'amount': 6.8, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 2, 4, 46)}
{'amount': 33.5, 'count': 3, 'start': datetime

{'amount': 9.3, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 3, 52, 17)}
{'amount': 10.5, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 3, 52, 18)}
{'amount': 9.8, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 3, 52, 25)}
{'amount': 38.3, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 3, 52, 30)}
{'amount': 5.8, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 3, 52, 55)}
{'amount': 7.3, 'count': 2, 'start': datetime.datetime(2018, 11, 1, 3, 53, 14)}
{'amount': 12.3, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 3, 53, 28)}
{'amount': 20.7, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 3, 53, 34)}
{'amount': 31.8, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 3, 53, 36)}
{'amount': 11.8, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 3, 53, 49)}
{'amount': 6.8, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 3, 54, 13)}
{'amount': 9.3, 'count': 0, 'start': datetime.datetime(2018, 11, 1, 3, 54, 20)}
{'amount': 5.3, 'count': 1, 'start

{'amount': 18.1, 'count': 5, 'start': datetime.datetime(2018, 11, 1, 6, 25, 43)}
{'amount': 4.8, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 6, 25, 44)}
{'amount': 11.1, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 6, 25, 48)}
{'amount': 7.3, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 6, 25, 49)}
{'amount': 4.8, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 6, 25, 49)}
{'amount': 11.1, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 6, 25, 50)}
{'amount': 5.8, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 6, 25, 51)}
{'amount': 12.8, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 6, 25, 51)}
{'amount': 7.5, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 6, 26, 7)}
{'amount': 13.8, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 6, 26, 14)}
{'amount': 6.0, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 6, 26, 16)}
{'amount': 7.3, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 6, 26, 28)}
{'amount': 19.5, 'count': 1, 'start'

{'amount': 8.3, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 7, 21, 34)}
{'amount': 14.0, 'count': 5, 'start': datetime.datetime(2018, 11, 1, 7, 21, 36)}
{'amount': 17.8, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 7, 21, 36)}
{'amount': 13.5, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 7, 21, 38)}
{'amount': 13.3, 'count': 6, 'start': datetime.datetime(2018, 11, 1, 7, 21, 38)}
{'amount': 6.8, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 7, 21, 39)}
{'amount': 12.9, 'count': 3, 'start': datetime.datetime(2018, 11, 1, 7, 21, 51)}
{'amount': 8.0, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 7, 21, 59)}
{'amount': 6.8, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 7, 22, 1)}
{'amount': 12.3, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 7, 22, 2)}
{'amount': 6.3, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 7, 22, 4)}
{'amount': 19.8, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 7, 22, 7)}
{'amount': 70.2, 'count': 5, 'start':

{'amount': 29.1, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 7, 57, 54)}
{'amount': 9.3, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 7, 57, 57)}
{'amount': 9.9, 'count': 2, 'start': datetime.datetime(2018, 11, 1, 7, 57, 58)}
{'amount': 10.5, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 7, 57, 59)}
{'amount': 15.3, 'count': 2, 'start': datetime.datetime(2018, 11, 1, 7, 57, 59)}
{'amount': 14.1, 'count': 2, 'start': datetime.datetime(2018, 11, 1, 7, 58)}
{'amount': 3.3, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 7, 58)}
{'amount': 24.3, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 7, 58, 1)}
{'amount': 29.3, 'count': 5, 'start': datetime.datetime(2018, 11, 1, 7, 58, 3)}
{'amount': 9.8, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 7, 58, 8)}
{'amount': 6.3, 'count': 6, 'start': datetime.datetime(2018, 11, 1, 7, 58, 9)}
{'amount': 16.3, 'count': 2, 'start': datetime.datetime(2018, 11, 1, 7, 58, 13)}
{'amount': 14.1, 'count': 1, 'start': datetim

{'amount': 8.7, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 8, 34, 42)}
{'amount': 23.7, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 8, 34, 43)}
{'amount': 7.3, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 8, 34, 48)}
{'amount': 15.3, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 8, 34, 54)}
{'amount': 19.8, 'count': 2, 'start': datetime.datetime(2018, 11, 1, 8, 34, 54)}
{'amount': 10.1, 'count': 5, 'start': datetime.datetime(2018, 11, 1, 8, 35, 3)}
{'amount': 5.8, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 8, 35, 3)}
{'amount': 8.3, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 8, 35, 5)}
{'amount': 42.0, 'count': 5, 'start': datetime.datetime(2018, 11, 1, 8, 35, 6)}
{'amount': 5.1, 'count': 2, 'start': datetime.datetime(2018, 11, 1, 8, 35, 8)}
{'amount': 14.1, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 8, 35, 9)}
{'amount': 10.3, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 8, 35, 11)}
{'amount': 17.1, 'count': 1, 'start': d

{'amount': 9.3, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 9, 12, 22)}
{'amount': 13.3, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 9, 12, 24)}
{'amount': 10.5, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 9, 12, 25)}
{'amount': 13.5, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 9, 12, 26)}
{'amount': 6.8, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 9, 12, 28)}
{'amount': 12.0, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 9, 12, 31)}
{'amount': 8.3, 'count': 2, 'start': datetime.datetime(2018, 11, 1, 9, 12, 31)}
{'amount': 5.7, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 9, 12, 35)}
{'amount': 27.9, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 9, 12, 36)}
{'amount': 15.3, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 9, 12, 37)}
{'amount': 10.5, 'count': 2, 'start': datetime.datetime(2018, 11, 1, 9, 12, 38)}
{'amount': 15.3, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 9, 12, 40)}
{'amount': 10.5, 'count': 1, 'st

{'amount': 13.6, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 9, 51, 13)}
{'amount': 44.4, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 9, 51, 14)}
{'amount': 24.5, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 9, 51, 18)}
{'amount': 12.7, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 9, 51, 22)}
{'amount': 12.3, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 9, 51, 22)}
{'amount': 8.1, 'count': 0, 'start': datetime.datetime(2018, 11, 1, 9, 51, 30)}
{'amount': 14.1, 'count': 2, 'start': datetime.datetime(2018, 11, 1, 9, 51, 33)}
{'amount': 6.3, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 9, 51, 34)}
{'amount': 7.5, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 9, 51, 36)}
{'amount': 15.3, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 9, 51, 41)}
{'amount': 5.0, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 9, 51, 46)}
{'amount': 18.5, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 9, 51, 47)}
{'amount': 15.9, 'count': 1, 'st

 'start': datetime.datetime(2018, 11, 1, 10, 18, 26)}
{'amount': 16.6,
 'count': 6,
 'start': datetime.datetime(2018, 11, 1, 10, 18, 27)}
{'amount': 10.8,
 'count': 1,
 'start': datetime.datetime(2018, 11, 1, 10, 18, 27)}
{'amount': 23.7,
 'count': 1,
 'start': datetime.datetime(2018, 11, 1, 10, 18, 30)}
{'amount': 7.5, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 10, 18, 30)}
{'amount': 57.3,
 'count': 1,
 'start': datetime.datetime(2018, 11, 1, 10, 18, 31)}
{'amount': 7.3, 'count': 5, 'start': datetime.datetime(2018, 11, 1, 10, 18, 33)}
{'amount': 27.3,
 'count': 1,
 'start': datetime.datetime(2018, 11, 1, 10, 18, 35)}
{'amount': 5.1, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 10, 18, 35)}
{'amount': 10.8,
 'count': 1,
 'start': datetime.datetime(2018, 11, 1, 10, 18, 35)}
{'amount': 14.8,
 'count': 1,
 'start': datetime.datetime(2018, 11, 1, 10, 18, 37)}
{'amount': 15.8,
 'count': 5,
 'start': datetime.datetime(2018, 11, 1, 10, 18, 37)}
{'amount': 6.3, 'count': 5, 'st

{'amount': 10.8,
 'count': 1,
 'start': datetime.datetime(2018, 11, 1, 10, 31, 33)}
{'amount': 15.0,
 'count': 1,
 'start': datetime.datetime(2018, 11, 1, 10, 31, 34)}
{'amount': 8.8, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 10, 31, 36)}
{'amount': 12.8,
 'count': 1,
 'start': datetime.datetime(2018, 11, 1, 10, 31, 38)}
{'amount': 10.8,
 'count': 2,
 'start': datetime.datetime(2018, 11, 1, 10, 31, 40)}
{'amount': 21.7,
 'count': 2,
 'start': datetime.datetime(2018, 11, 1, 10, 31, 41)}
{'amount': 29.1,
 'count': 1,
 'start': datetime.datetime(2018, 11, 1, 10, 31, 41)}
{'amount': 13.0,
 'count': 1,
 'start': datetime.datetime(2018, 11, 1, 10, 31, 46)}
{'amount': 7.5, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 10, 31, 53)}
{'amount': 15.8,
 'count': 4,
 'start': datetime.datetime(2018, 11, 1, 10, 31, 58)}
{'amount': 22.3,
 'count': 6,
 'start': datetime.datetime(2018, 11, 1, 10, 32, 12)}
{'amount': 9.9, 'count': 5, 'start': datetime.datetime(2018, 11, 1, 10, 32, 13)}
{

 'start': datetime.datetime(2018, 11, 1, 10, 49, 22)}
{'amount': 14.3,
 'count': 2,
 'start': datetime.datetime(2018, 11, 1, 10, 49, 23)}
{'amount': 11.7,
 'count': 1,
 'start': datetime.datetime(2018, 11, 1, 10, 49, 24)}
{'amount': 20.3,
 'count': 1,
 'start': datetime.datetime(2018, 11, 1, 10, 49, 24)}
{'amount': 8.3, 'count': 1, 'start': datetime.datetime(2018, 11, 1, 10, 49, 28)}
{'amount': 24.1,
 'count': 1,
 'start': datetime.datetime(2018, 11, 1, 10, 49, 34)}
{'amount': 13.8,
 'count': 1,
 'start': datetime.datetime(2018, 11, 1, 10, 49, 36)}
{'amount': 14.8,
 'count': 1,
 'start': datetime.datetime(2018, 11, 1, 10, 49, 37)}
{'amount': 26.8,
 'count': 2,
 'start': datetime.datetime(2018, 11, 1, 10, 49, 39)}
{'amount': 13.5,
 'count': 1,
 'start': datetime.datetime(2018, 11, 1, 10, 49, 39)}
{'amount': 48.5,
 'count': 1,
 'start': datetime.datetime(2018, 11, 1, 10, 49, 41)}
{'amount': 24.3,
 'count': 3,
 'start': datetime.datetime(2018, 11, 1, 10, 49, 43)}
{'amount': 13.8,
 'count'