In [1]:
import numpy as np
import pandas as pd
import os
import csv
from bokeh.plotting import figure, show, output_file
from bokeh.charts import BoxPlot

### Retrieve the data and put it in a useable format

In [2]:
# Get the NOAA data filenames
files = [f for f in os.listdir() if f.endswith('.asc')]

In [3]:
# Create a dataframe of all temperature readings from NOAA data
df = dict()
for i in range(len(files)):
    coordinate = str(files[i].split('.')[3] + "_" + files[i].split('.')[4])
    x = np.genfromtxt(files[i])
    df[i] = pd.DataFrame(x[:,0:3],columns=['year','month','anomaly'])
    df[i]['coordinate'] = coordinate
    
frames = [i for i in df.values()]
temp = pd.concat(frames)
del temp['year'], temp['month']


### Look over the data briefly for sanity checks

In [4]:
temp.head() 

Unnamed: 0,anomaly,coordinate
0,-0.178744,00N_30N
1,-0.37088,00N_30N
2,-0.105133,00N_30N
3,0.057406,00N_30N
4,-0.253853,00N_30N


In [5]:
temp.describe()

Unnamed: 0,anomaly
count,21372.0
mean,-0.20306
std,0.492666
min,-10.062963
25%,-0.471704
50%,-0.251047
75%,0.042325
max,5.865385


### Visualize

In [6]:
# Create a boxplot of the data
title = "Global Annual Temperature Anomaly (1880 - 2016)"
box_plot = BoxPlot(temp, label=['coordinate'], values='anomaly', legend=False,
                   whisker_color='#335c81', color='#8797b2', outlier_line_color='#698f3f', 
                   outlier_fill_color='#698f3f', title=title)

box_plot.xaxis.axis_label = "Latitude/Longitude"
box_plot.yaxis.axis_label = "Difference from 1971-2000 Average (in deg. C)"

output_file("Global_Temperature_Anomaly.html")

show(box_plot)