In [1]:
import csv


with open('mpg.csv') as csvfile:
    mpg = list(csv.DictReader(csvfile))
    
mpg[:3] # The first three dictionaries in our list.

[OrderedDict([('', '1'),
              ('manufacturer', 'audi'),
              ('model', 'a4'),
              ('displ', '1.8'),
              ('year', '1999'),
              ('cyl', '4'),
              ('trans', 'auto(l5)'),
              ('drv', 'f'),
              ('cty', '18'),
              ('hwy', '29'),
              ('fl', 'p'),
              ('class', 'compact')]),
 OrderedDict([('', '2'),
              ('manufacturer', 'audi'),
              ('model', 'a4'),
              ('displ', '1.8'),
              ('year', '1999'),
              ('cyl', '4'),
              ('trans', 'manual(m5)'),
              ('drv', 'f'),
              ('cty', '21'),
              ('hwy', '29'),
              ('fl', 'p'),
              ('class', 'compact')]),
 OrderedDict([('', '3'),
              ('manufacturer', 'audi'),
              ('model', 'a4'),
              ('displ', '2'),
              ('year', '2008'),
              ('cyl', '4'),
              ('trans', 'manual(m6)'),
              ('drv',

csv.Dictreader has read in each row of our csv file as a dictionary. len shows that our list is comprised of 234 dictionaries.

In [2]:
len(mpg)#length 

234

In [8]:
type(mpg[0])

collections.OrderedDict

In [9]:
mpg[0]['cty']

'18'

In [3]:
mpg[0].keys()#keys gives us the column names of our csv

odict_keys(['', 'manufacturer', 'model', 'displ', 'year', 'cyl', 'trans', 'drv', 'cty', 'hwy', 'fl', 'class'])

In [4]:
sum(float(d['cty']) for d in mpg) / len(mpg)
#This is how to find the average cty fuel economy across all cars. All values in the dictionaries are strings, 
#so we need to convert to floa

16.858974358974358

In [5]:
sum(float(d['hwy']) for d in mpg) / len(mpg)#Similarly this is how to find the average hwy fuel economy across all cars.

23.44017094017094

In [6]:
cylinders = set(d['cyl'] for d in mpg)
cylinders#Use set to return the unique values for the number of cylinders the cars in our dataset have

{'4', '5', '6', '8'}

Here's a more complex example where we are grouping the cars by number of cylinder, and finding the average cty mpg for each group

In [30]:
CtyMpgByCyl = []

for c in cylinders: # iterate over all the cylinder levels
    summpg = 0
    cyltypecount = 0
    for d in mpg: # iterate over all dictionaries
        if d['cyl'] == c: # if the cylinder level type matches,
            summpg += float(d['cty']) # add the cty mpg
            cyltypecount += 1 # increment the count
    CtyMpgByCyl.append((c, summpg / cyltypecount)) # append the tuple ('cylinder', 'avg mpg')

CtyMpgByCyl.sort(key=lambda y: y[0])
CtyMpgByCyl

[('4', 21.012345679012345),
 ('5', 20.5),
 ('6', 16.21518987341772),
 ('8', 12.571428571428571)]

In [32]:
#Use set to return the unique values for the class types in our datase
vehicleclass = set(d['class'] for d in mpg) # what are the class types
vehicleclass

{'2seater', 'compact', 'midsize', 'minivan', 'pickup', 'subcompact', 'suv'}

In [103]:
#And here's an example of how to find the average hwy mpg for each class of vehicle in our dataset.
HwyMpgByClass = []

for t in vehicleclass: # iterate over all the vehicle classes
    summpg = 0
    vclasscount = 0
    for d in mpg: # iterate over all dictionaries
        if d['class'] == t: # if the cylinder amount type matches,
            summpg += float(d['hwy']) # add the hwy mpg
            vclasscount += 1 # increment the count
    HwyMpgByClass.append((t, summpg / vclasscount)) # append the tuple ('class', 'avg mpg')

HwyMpgByClass.sort(key=lambda x: x[1])
HwyMpgByClass

[('pickup', 16.87878787878788),
 ('suv', 18.129032258064516),
 ('minivan', 22.363636363636363),
 ('2seater', 24.8),
 ('midsize', 27.29268292682927),
 ('subcompact', 28.142857142857142),
 ('compact', 28.29787234042553)]