# Climate Change Data Analysis

The goal of this project is to explore land temperature data in different areas over time, to assess the severity of climate change.

In [1]:
import pickle
import os
import sys
import pandas as pd
import numpy as np
import copy

import plotly.graph_objs as go
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected=True)

%reload_ext autoreload
%autoreload 2

In [3]:
# Load data

with open('data/GlobalLandTemperaturesByCity.csv', 'r') as f:
    city_data = pd.read_csv(f)

## Basic Data Exploration

First we'll investigate the various variables in the data and do some cleaning.

In [4]:
city_data.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,City,Country,Latitude,Longitude
0,1743-11-01,6.068,1.737,Århus,Denmark,57.05N,10.33E
1,1743-12-01,,,Århus,Denmark,57.05N,10.33E
2,1744-01-01,,,Århus,Denmark,57.05N,10.33E
3,1744-02-01,,,Århus,Denmark,57.05N,10.33E
4,1744-03-01,,,Århus,Denmark,57.05N,10.33E


In [5]:
city_data.shape

(8599212, 7)

In [12]:
city_data.dtypes

dt                                object
AverageTemperature               float64
AverageTemperatureUncertainty    float64
City                              object
Country                           object
Latitude                          object
Longitude                         object
dtype: object

The data is pretty self-explanatory - we have average temperatures with uncertainty for cities over time. We can convert the time column into datetime objects and parse the latitude and longitude (remove the direction and multiply by -1 for S and W)

In [23]:
city_data['time'] = pd.to_datetime(city_data['dt'], format='%Y-%m-%d')

In [33]:
'34.56S'.split('N')

['34.56S']

In [None]:
def parse_lat_lon(val, t):
    if t == 'lat':
        if 'N' in val:

In [29]:
city_data.loc[np.array(['N' not in x for x in city_data['Latitude']]),:]

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,City,Country,Latitude,Longitude,time
78638,1841-01-01,21.432,3.286,Adelaide,Australia,34.56S,138.16E,1841-01-01 00:00:00
78639,1841-02-01,22.087,2.458,Adelaide,Australia,34.56S,138.16E,1841-02-01 00:00:00
78640,1841-03-01,18.859,3.547,Adelaide,Australia,34.56S,138.16E,1841-03-01 00:00:00
78641,1841-04-01,15.033,1.884,Adelaide,Australia,34.56S,138.16E,1841-04-01 00:00:00
78642,1841-05-01,12.864,1.481,Adelaide,Australia,34.56S,138.16E,1841-05-01 00:00:00
78643,1841-06-01,10.896,1.479,Adelaide,Australia,34.56S,138.16E,1841-06-01 00:00:00
78644,1841-07-01,10.172,1.782,Adelaide,Australia,34.56S,138.16E,1841-07-01 00:00:00
78645,1841-08-01,10.842,1.485,Adelaide,Australia,34.56S,138.16E,1841-08-01 00:00:00
78646,1841-09-01,12.147,1.502,Adelaide,Australia,34.56S,138.16E,1841-09-01 00:00:00
78647,1841-10-01,14.930,1.607,Adelaide,Australia,34.56S,138.16E,1841-10-01 00:00:00


In [None]:
Looking at the number of cities and countires, we get the following:

In [8]:
cities = np.unique(city_data['City'])

In [13]:
countries = np.unique(city_data['Country'])

In [11]:
cities[cities == 'Toronto']

array(['Toronto'], dtype=object)

In [15]:
print 'Number of unique cities: ' + str(len(cities))
print 'Number of unique countries: ' + str(len(countries))

Number of unique cities: 3448
Number of unique countries: 159


We'll look at the distribution of cities on a map:

In [32]:
#unique_city_data = city_data.groupby('City').first()

mpis = [{'lat': ['34.56S'],
  'lon': ['138.16'],
  'marker': {'color': 'rgb(0,116,217)',
   'line': {'color': 'rgb(40,40,40)', 'width': 0.5},
   'size': 38.700000000000003,
   'sizemode': 'diameter'},
  'text': '0.387',
  'type': 'scattergeo'},
]


layout = go.Layout(
    title = 'MPI',
    showlegend = True,
    geo = dict(
            scope='world',
            projection=dict( type = 'natural earth'),
            showland = True,
            landcolor = 'rgb(217, 217, 217)',
            subunitwidth=1,
            countrywidth=1,
            subunitcolor="rgb(255, 255, 255)",
            countrycolor="rgb(255, 255, 255)"
        ),)

fig =  go.Figure(layout=layout, data=mpis)
iplot( fig, validate=False)