In [1]:
import requests
import json
from pandas.io.json import json_normalize
import pandas as pd

import re
import time
from time import strptime
import datetime

import sys
sys.stdout = open('currents_output.log', 'w')

### Investigating currents 

In [2]:
with open('current_station_info.json', 'r') as station_info_file:
    currents_station_info = json.load(station_info_file)
    
with open('current_station_intervals.json', 'r') as station_dates_file:
    currents_station_intervals = json.load(station_dates_file)
    
    

In [3]:
for key, date_list in currents_station_info.items():
    if any(isinstance(el, list) for el in date_list):
        for i, dates in enumerate(date_list):
            for j, date in enumerate(dates[:2]):
                if date == '':
                    currents_station_info[key][0][1] = datetime.datetime.now()
                    continue
                split_date = re.findall(r"[\w']+", date)
                currents_station_info[key][i][j] = datetime.datetime(year = int(split_date[0]), 
                                month = int(split_date[1]),
                                day = int(split_date[2]), 
                                hour = int(split_date[3]), 
                                minute = int(split_date[4]))   
    else:
        currents_station_info[key] = []
        tmp_list = []
        for date in date_list[:2]:
            split_date = re.findall(r"[\w']+", date)
            tmp_list.append(datetime.datetime(year = int(split_date[2]), 
                            month = int(strptime(split_date[0],'%b').tm_mon),
                            day = int(split_date[1]), 
                            hour = int(split_date[3]),
                            minute = int(split_date[4])))
        tmp_list = tmp_list + date_list[2:]
        currents_station_info[key].append(tmp_list) #put the list inside of another list
    

In [4]:
def retrieveLifetimeData(station_id, date_lists):
    lifetime_data = []
    for date_list in date_lists:
        begin_date = date_list[0]
        begin_date += datetime.timedelta(minutes=begin_date.minute % 6)
        
        end_date = date_list[1]
        end_date -= datetime.timedelta(minutes=end_date.minute % 6)
        
        date = begin_date
        month = datetime.timedelta(days=31)
        first_loop = True
        while 1:
            end_loop = False
            next_date = date + month
            
            if next_date > end_date: 
                next_date = end_date
                end_loop = True
                print('this is where it ends')
                
            url = 'https://tidesandcurrents.noaa.gov/api/datagetter?'
            params = {
                'begin_date': '{:02d}/{:02d}/{} {:02d}:{:02d}'.format(date.month, date.day, date.year, date.hour, date.minute),
                'end_date':'{:02d}/{:02d}/{} {:02d}:{:02d}'.format(next_date.month, next_date.day, next_date.year, next_date.hour, next_date.minute),
                'station':station_id,
                'product':'currents',
                'units':'metric',
                'time_zone':'gmt',
                'application':'web_services',
                'format':'json'    
            }
            
            bin_list = []
            
            i=1
            while 1:
                params['bin'] = i
                resp = requests.get(url=url, params=params)
                try:
                    bin_list.append(pd.DataFrame(resp.json()['data']))
                except:
                    break
                bin_list[i-1].drop('b', axis=1, inplace=True)
                bin_list[i-1].set_index('t', inplace=True)
                bin_list[i-1].rename(columns = lambda x : '{}.{}.'.format(station_id, i) + x, inplace = True)
                i += 1
            try:    
                monthly_data = pd.concat(bin_list, axis=1)
                lifetime_data.append(monthly_data)
            except ValueError:
                print('Lost data for {}  -  {}'.format(date, next_date))
                pass
            
            date = next_date
            if end_loop:
                break
    print('Done with {}'.format(station_id))            
    return pd.concat(lifetime_data)

In [5]:
all_of_the_data = []
total = len(currents_station_info.keys())
counter = 0
for station_id, available_dates in currents_station_info.items():
    counter += 1
    print('{} of {}'.format(counter, total))
    print('{}:{}'.format(station_id, available_dates))
    all_of_the_data.append(retrieveLifetimeData(station_id, available_dates))

imachampion = pd.concat(all_of_the_data, axis=1)
imachampion.to_pickle('currents.pkl')


1 of 767
s10010:[[datetime.datetime(2016, 8, 26, 16, 0), datetime.datetime(2017, 1, 28, 22, 22, 55, 102939), "37° 48.058' N", "122° 20.865' W"]]
this is where it ends
Done with s10010
2 of 767
SFB1208:[[datetime.datetime(2012, 5, 20, 19, 44), datetime.datetime(2012, 7, 11, 21, 32), "37° 47.849' N", "122° 22.432' W"]]
this is where it ends
Done with SFB1208
3 of 767
lm0101:[[datetime.datetime(2015, 2, 6, 21, 30), datetime.datetime(2017, 1, 28, 22, 22, 55, 103323), "29° 55.346' N", "90° 4.268' W"], [datetime.datetime(2012, 7, 3, 17, 30), datetime.datetime(2015, 2, 4, 21, 0), "29° 55.346' N", "90° 4.268' W"], [datetime.datetime(2009, 8, 20, 14, 52), datetime.datetime(2012, 7, 3, 17, 0), "29° 55.346' N", "90° 4.268' W"]]
Lost data for 2016-09-17 21:30:00  -  2016-10-18 21:30:00
Lost data for 2016-10-18 21:30:00  -  2016-11-18 21:30:00
Lost data for 2016-11-18 21:30:00  -  2016-12-19 21:30:00
Lost data for 2016-12-19 21:30:00  -  2017-01-19 21:30:00
this is where it ends
this is where it en

ValueError: No objects to concatenate