In [1]:
import statistics
import numpy as np
import json
from scipy import spatial

#loads the data from one of the weather files dropping entries that do not conform
def load_data(file_name, reverse_ll=False):
    loaded_data = {}
    with open(file_name, "r") as file:
        record_num = 0
        for weather_station in file:
            weather_records = weather_station.split()
            if len(weather_records)==371:
                if reverse_ll:
                    loaded_data[weather_records[3]+ " " + weather_records[2]] = get_monthly(list(map(float, weather_records[5:])))
                else:
                    loaded_data[weather_records[2]+ " " + weather_records[3]] = get_monthly(list(map(float, weather_records[5:])))
            else:
                print("ERROR: {}".format(record_num))
            record_num +=1
        return loaded_data

#takes a list of all the daily weathers for the year (366 days) and returns monthly averages
def get_monthly(daily_weather):
    month_lengths = [31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
    averages = []
    cur_index = 0
    for i, length in enumerate(month_lengths):
        monthly_weather = daily_weather[cur_index:cur_index+length]
        monthly_weather = list(filter(lambda x: -60 < x and x < 100, monthly_weather))
        if len(monthly_weather)!=0:
            averages.append(statistics.mean(monthly_weather)) 
        else:
            averages.append(None)  
        cur_index += length
    return averages

#returns the weighted average of lists used for interpolating between weather stations
#lists must all be of equal length ex [2 2 2] [8 8 8] returns [5 5 5]
def average_lists(lists, weights):
    size = len(lists[0])
    totals = [0] * size
    counts = [0] * size
    for i in range(len(lists)):
        for j in range(size):
            if lists[i][j]:
                totals[j]+= lists[i][j] * weights[i]
                counts[j]+= 1
    return [l/counts[i] if counts[i] else None for i, l in enumerate(totals)]

def key_to_cord(key):
    return list(map(float, key.split()))

def cord_to_key(cord):
    return str(cord[0])+" "+str(cord[1])

# gets distance between two vectors (used for distance between weather stations and target
def distance(c1, c2):
    return np.linalg.norm(np.array(c1)-np.array(c2))

#interpolates the values for a year of data for a list of targets ie [[50, 150] [25, 150]]
def interpolate_year(data, targets, max_range = 3.0):
    interpolated_vals = {}
    vals = list(data.values())
    cords = list(map(key_to_cord, data.keys()))
    quadTree = spatial.KDTree(np.array(cords))
    for target in targets:        
        stations_in_range = quadTree.query_ball_point(target, r=max_range)
        #Gets data about stations in range
        station_cords = [cords[i] for i in stations_in_range]
        station_vals = [vals[i] for i in stations_in_range]
        distances = list(map(lambda x: distance(target, x) , station_cords))
        
        #Averages the station data together
        if len(distances) >= 1:
            interpolated_vals[cord_to_key(target)] = average_lists(station_vals, [i/max_range for i in distances])
        else:
            interpolated_vals[cord_to_key(target)] = [None] * 12
    return interpolated_vals

#interpolates for a range of years
def interpolate_range(start, stop, file_prefix, targets):
    all_data = {}
    for i in range(start, stop):
        print("Interpolating year: {}".format(i))
        #checks if its a rain file and ques to reverse the lat lon
        if "Prnational" in file_prefix:
            yearly_data = load_data(file_prefix+str(i)+ ".txt", True)
        else:
            yearly_data = load_data(file_prefix+str(i)+ ".txt")
        all_data[i] = interpolate_year(yearly_data, targets)
    return all_data

def example_usage():
    cords = json.read(open('cords.json', 'r'))
    interpolated_range = interpolate_range(1950, 1960, "Tmax/X", [[50, 190]])
    json.dump(interpolated_range, open("tout.json","w"))

In [None]:
cords = json.load(open('cords.json', 'r'))['cords']
interpolated_range = interpolate_range(1950, 1951, "P/Prnational", cords)
json.dump(interpolated_range, open("test_percipitation.json","w"))

In [2]:
restPrefix = {'Tmax':'/X',
             'Tmin': '/N',
             'P':'/Prnational'}
cords = json.load(open('cords.json', 'r'))['cords']
startDate = 1950
endDate = 2012
for i in range(1950, 2010, 10):
    for j in ['Tmax', 'Tmin', 'P']:
        print("Interpolating {} {}".format(i, j))
        interpolated_range = interpolate_range(i, i+10, j+restPrefix[j], cords)
        #creates average ranges too
        interpolated_range_average = {}
        for key, value in interpolated_range.items():
            year_averages = {}
            for keyj, valuej in value.items():
                monthly_vals = np.array(valuej)
                try:
                    year_averages[keyj] = [monthly_vals[monthly_vals != np.array(None)].mean()]
                except:
                    year_averages[keyj] = [None]
            interpolated_range_average[key] = year_averages
        json.dump(interpolated_range, open('wdata/'+j+ str(i)+'.json',"w"))
        json.dump(interpolated_range_average, open('wdata/'+j+ str(i)+'_avg.json',"w"))

Interpolating 1950 Tmax
Interpolating year: 1950
Interpolating year: 1951
Interpolating year: 1952
Interpolating year: 1953
Interpolating year: 1954
Interpolating year: 1955
Interpolating year: 1956
Interpolating year: 1957
Interpolating year: 1958
Interpolating year: 1959




Interpolating 1950 Tmin
Interpolating year: 1950
Interpolating year: 1951
Interpolating year: 1952
Interpolating year: 1953
Interpolating year: 1954
Interpolating year: 1955
Interpolating year: 1956
Interpolating year: 1957
Interpolating year: 1958
Interpolating year: 1959
Interpolating 1950 P
Interpolating year: 1950
ERROR: 225
ERROR: 245
ERROR: 256
ERROR: 257
ERROR: 259
ERROR: 376
ERROR: 601
ERROR: 656
ERROR: 888
ERROR: 910
ERROR: 945
ERROR: 967
ERROR: 1014
ERROR: 1088
ERROR: 1099
ERROR: 1109
Interpolating year: 1951
ERROR: 171
ERROR: 194
ERROR: 224
ERROR: 254
ERROR: 267
ERROR: 268
ERROR: 270
ERROR: 272
ERROR: 398
ERROR: 435
ERROR: 613
ERROR: 625
ERROR: 627
ERROR: 657
ERROR: 675
ERROR: 685
ERROR: 751
ERROR: 1002
ERROR: 1026
ERROR: 1037
ERROR: 1065
ERROR: 1069
ERROR: 1082
ERROR: 1083
ERROR: 1090
ERROR: 1094
ERROR: 1096
ERROR: 1141
ERROR: 1142
ERROR: 1187
ERROR: 1216
ERROR: 1220
ERROR: 1227
Interpolating year: 1952
ERROR: 200
ERROR: 230
ERROR: 257
ERROR: 274
ERROR: 277
ERROR: 414
ERROR

Interpolating year: 1997
Interpolating year: 1998
Interpolating year: 1999
Interpolating 2000 Tmax
Interpolating year: 2000
Interpolating year: 2001
Interpolating year: 2002
Interpolating year: 2003
Interpolating year: 2004
Interpolating year: 2005
Interpolating year: 2006
Interpolating year: 2007
Interpolating year: 2008
Interpolating year: 2009
Interpolating 2000 Tmin
Interpolating year: 2000
Interpolating year: 2001
Interpolating year: 2002
Interpolating year: 2003
Interpolating year: 2004
Interpolating year: 2005
Interpolating year: 2006
Interpolating year: 2007
Interpolating year: 2008
Interpolating year: 2009
Interpolating 2000 P
Interpolating year: 2000
Interpolating year: 2001
Interpolating year: 2002
Interpolating year: 2003
Interpolating year: 2004
Interpolating year: 2005
Interpolating year: 2006
Interpolating year: 2007
Interpolating year: 2008
Interpolating year: 2009


In [None]:
len(cords)

In [None]:
t1990 = load_data('Tmax/X1990.txt')


In [None]:
[len(t1990[i]) for i in t1990.keys()]

In [None]:
average_lists([[1, 1, 1]], [1])

In [None]:
interpolated_range

In [None]:
interpolate_range(1990, 1991, "Tmax/X", [[-129.58, 32.49]])