# Import termperature data

In [1]:
# 
# Import libraries and custom modules
# 
import urllib.request, urllib.parse, urllib.error
import pandas as pd
import re
import Wrangler as Wr
## Wr.extractLines
## Wr.multipleReplace
## Wr.makeDirFile


# Access URL, then request and read temperature data file 
with urllib.request.urlopen('http://berkeleyearth.lbl.gov/auto/Global/Land_and_Ocean_complete.txt') as fhand:
    # Decode file data and form a list
    file = [line.decode().strip() for line in fhand]
    
# Identify Index at which surface-air temperature data begins
for c,el in enumerate(file):
    if len(el) == 0:
        break
        
# Identify Index at which surface-air temperature data ends
for c2,el2 in enumerate(file[c+1:]):
    if len(el2) == 0:
        break

# Extract set: Global Average Temperature Anomaly with Sea Ice Temperature Inferred from Air Temperatures
lst = Wr.extractLines(file[c+1:c2+c+1])

# Parse main headers and sub headers into respective lists
h1 = Wr.multipleReplace({'%':'','-y':'Y'},file[74]).split()
h2 = re.sub('[%\s\.]*','',file[75]).split(',')

# Consolidate main and sub headers into single header list
h_list = [h1[num//2-1] + h2[num] if num > 1 else h2[num] for num in range(len(h2))]

# Extract mean 1950-1981 mean air temperature used as center of anomalies
centerMean = float(re.sub('^.+?:','',file[41]).split('+/-')[0].strip())
centerUnc = float(re.sub('^.+?:','',file[41]).split('+/-')[1].strip())

# Convert list to DataFrame
df = pd.DataFrame(lst, columns = h_list) 

# Add Center mean and uncertainty to df
df['centerMean'] = centerMean
df['centerUnc'] = centerUnc

# Create Datetime 
df['Date'] = pd.to_datetime(df[['Year', 'Month']].assign(DAY=1))

# Set data to numeric
df[h_list[2:]] = df[h_list[2:]].apply(pd.to_numeric, errors='coerce')

# Create a Month name column
df['strMonth'] = df.Date.dt.month_name()

# Create Country variable and fill with Global (Use for Ed Hawkins Stripes)
df['Country'] = 'Global'

# # Export csv file
fullname = Wr.makeDirFile('Global_Temperature')
export_csv = df.to_csv(fullname, index=False, header=True)