In [52]:
import numpy as np
from scipy.interpolate import CubicSpline, KroghInterpolator, BarycentricInterpolator
from bokeh.plotting import figure, output_notebook, show
from bokeh.layouts import gridplot
output_notebook()
import requests
import bs4
import re
import pandas as pd

### Problem #1

In [53]:
x = [16, 12, 6, 2, 4, 7, 11, 15, 14, 7]
y = [4, 1, 0, 4, 10, 15, 18, 17, 12, 8]

In [54]:
def spline_interpolation(x, y, bc, num_points):
    assert(len(x) == len(y))
    cs_x, cs_y = [CubicSpline(range(0, len(arr)), arr, bc_type=bc) for arr in (x, y)]
    return [cs(np.linspace(0, len(x), num_points)) for cs in (cs_x, cs_y)]

In [55]:
boundary_conditions = ["not-a-knot", "clamped", "natural", ((2,1),(2,1)), ((2,2),(2,1)), ((2,1),(2,2))]
spline_interpolations = [spline_interpolation(x, y, bc, 100) for bc in boundary_conditions]

In [56]:
def make_figure(x, y, title):
    if (isinstance(title, tuple)):
        muL, muR = list(zip(*title))[1]
        title = f"muL={muL}, muR={muR}"
    fig = figure(width=250, plot_height=250, title=title)
    fig.line(x, y)
    return fig

figs = [make_figure(tup[0], tup[1], title) for tup, title in zip(spline_interpolations, boundary_conditions)]
fig = gridplot(np.reshape(figs, (2, 3)).tolist())
show(fig)

### Problem 2

In [57]:
url = "https://en.wikipedia.org/wiki/History_of_United_States_postage_rates"
source = requests.get(url).content
bs_tree = bs4.BeautifulSoup(source, "lxml")
rows = bs_tree.find("table").findAll("tr")

In [58]:
years_and_prices = [row.findAll("td")[0:2] for row in rows[1:]]
years_and_prices = [(re.search(r"\d{4}", year.text).group(), re.match(r".\d+", price.text).group()) for year, price in years_and_prices]
years_and_prices = [(int(year), float(price)) for year, price in years_and_prices]
years_and_prices = {price: year for year, price in sorted(years_and_prices, key=lambda tup: tup[1])}

In [59]:
prices, years = zip(*years_and_prices.items())

In [60]:
def coef(x, y):
    np.array(x).astype(float)
    np.array(y).astype(float)
    n = len(x)
    a = []
    for i in range(n):
        a.append(y[i])

    for j in range(1, n):
        for i in range(n-1, j-1, -1):
            a[i] = float(a[i]-a[i-1])/float(x[i]-x[i-j])
    return np.array(a)

In [61]:
titles = ["cubic spline", "newton polynomial", "least squares polynomial"]
spline = CubicSpline(prices, years, bc_type="not-a-knot")
newton_poly = np.poly1d(coef(prices, years))
least_squares_poly = np.poly1d(np.polyfit(prices, years, 5))
df = pd.DataFrame(index=titles, columns=["year when cost = 0.5"])
df.iloc[:,0] = [spline(0.5), newton_poly(0.5), least_squares_poly(0.5)]
df

Unnamed: 0,year when cost = 0.5
cubic spline,2007.5835955360908
newton polynomial,2.45567e+23
least squares polynomial,2022.77


In [62]:
x = np.linspace(0, 0.51, 1000)
ys = [spline(x), newton_poly(x), least_squares_poly(x)]

In [63]:
def make_figure(x, y, title):
    fig = figure(width=250, plot_height=250, title=title)
    fig.line(x, y)
    return fig

plots = [make_figure(x, y, title) for y, title in zip(ys, titles)]
fig = gridplot([plots])
show(fig)

### Problem #3

In [64]:
url = "https://en.wikipedia.org/wiki/United_States_Census"
source = requests.get(url).content
bs_tree = bs4.BeautifulSoup(source, "lxml")
rows = bs_tree.findAll("table")[1].findAll("tr")[1:]

In [65]:
def choose_indexes(arr, indexes):
    return [arr[i] for i in indexes]

years_and_populations = [choose_indexes(row.findAll("td"), [1,3]) for row in rows]
years_and_populations = [(int(year.text), int(population.text.replace(",",""))) for year, population in years_and_populations]
years_and_populations

[(1790, 3929326),
 (1800, 5308483),
 (1810, 7239881),
 (1820, 9638453),
 (1830, 12866020),
 (1840, 17069453),
 (1850, 23191876),
 (1860, 31443321),
 (1870, 39818449),
 (1880, 50189209),
 (1890, 62947714),
 (1900, 76212168),
 (1910, 92228496),
 (1920, 106021537),
 (1930, 122775046),
 (1940, 132164569),
 (1950, 150697361),
 (1960, 179323175),
 (1970, 203302031),
 (1980, 226545805),
 (1990, 248709873),
 (2000, 281421906),
 (2010, 308745538)]

In [66]:
years, populations = zip(*years_and_populations)
spline = CubicSpline(years, populations, bc_type="natural")

In [67]:
x = np.arange(np.min(years), np.max(years)+1)
y = spline(x)

In [74]:
plot = figure(width=350, plot_height=350, title="United States Census")
plot.line(x, y)
pass

In [75]:
def compute_error(x, y, index):
    x_arr, y_arr = np.array(x), np.array(y)
    assert(x_arr.shape == y_arr.shape)
    mask = np.ones(len(x),dtype=bool)
    mask[index] = 0
    spline = CubicSpline(x_arr[mask], y_arr[mask], bc_type="natural")
    validation_sample = spline(x_arr[index])
    return np.abs((y[index] - validation_sample)/y[index])

errors = [compute_error(years, populations, i) for i in range(0, len(years))]

In [76]:
error_plot = figure(width=350, plot_height=350, title="Interpolation error")
error_plot.line(years, errors)
show(gridplot([[plot, error_plot]]))

The error is higher at the beginning endpoint but is overall quite low.