Introductory Python code examples  
Lee Spector, lspector@amherst.edu  
August, 2020

In [None]:
%matplotlib inline

from matplotlib import pyplot as plt 
import numpy as np

plt.rcdefaults() # set the matplotlib runtime configuration (rc) to its defaults

In [None]:
# This gives us access to the library that will read a csv file:

import csv 

In [None]:
# This reads a data file, which should be in the same folder
# as this notebook, and puts it in the variable called 
# records as a list of lists, where each sub-list is one
# row of the data:

with open('populationbycountry19802010millions.csv') as f:
    records = [row for row in csv.reader(f)]

In [None]:
# This shows us the full data set:

records

In [None]:
# This shows us how many rows there are:

len(records)

In [None]:
# This shows us how many items are in the first row:

len(records[0])

In [None]:
# This gives us the first row, starting at the second item
# and going to the end:

records[0][1:]

In [None]:
def try_float(s):
    try:
        f = float(s)
    except ValueError:
        return s
    return f

In [None]:
try_float("123.4")

In [None]:
type(try_float("123.4"))

In [None]:
try_float("Alphabet soup")

In [None]:
records = [[try_float(x) for x in row] for row in records]

In [None]:
records

In [None]:
# Here we plot the first row (without its first item) against
# the second row (without its first item):

plt.xticks(rotation=90)
plt.plot(records[0][1:], records[1][1:])

In [None]:
# This gives us a list of just the rows for which
# the first item is 'Mexico':

[row for row in records if row[0] == 'Mexico']

In [None]:
# This gives us just the actual row that starts with 'Mexico':

[row for row in records if row[0] == 'Mexico'][0]

In [None]:
# Here's the same thing for 'Sierra Leone':

[row for row in records if row[0] == 'Sierra Leone'][0]

In [None]:
# Here we plot the first row (the years) on the x axis and the
# population of Sierra Leone on the y axis:

plt.plot(records[0][1:], [row for row in records if row[0] == 'Sierra Leone'][0][1:])

In [None]:
# Not sure why, but an explicit call to plt.show() is necessary now...

plt.plot(records[0][1:], [row for row in records if row[0] == 'Sierra Leone'][0][1:])
plt.show()

In [None]:
# To ignore, rather than choking on, problematic characters, you can use
# code like this, which was adapted from:
# http://stackoverflow.com/questions/12468179/
#   unicodedecodeerror-utf8-codec-cant-decode-byte-0x9c

import codecs
with codecs.open('populationbycountry19802010millions.csv', 
                 "r", encoding='utf-8', errors='ignore') as f:
    records = [row for row in csv.reader(f)]