<h1>Surfcast.ca</h1>
<h3>A Goodfellow Analytics Creation</h3>
<h5>In partnership with Griffin Global</h5>

In [1]:
# Reset Notebook
%reset

Once deleted, variables cannot be recovered. Proceed (y/[n])? y


In [2]:
# Import python packages
from bs4 import BeautifulSoup
from urllib import request
import requests
from urllib.request import urlopen
from datetime import datetime, timedelta
from pytz import timezone
import pytz
import matplotlib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from dateutil import parser
from dateutil import tz
import scipy.interpolate
import os
from sqlalchemy import create_engine # database connection

# Matplotlib inline
%matplotlib inline

# Hide ipython notebook warnings
import warnings
warnings.filterwarnings('ignore')

<h1>Step 1</h1>
<h3>Get HTML Database as List of Files</h3>

In [3]:
# Set URL path to NOAA 'gridded fields' database
url = 'http://www.glerl.noaa.gov/ftp/EMF/glcfs/gridded_fields/'

In [4]:
# Set URL path to NOAA 'gridded fields' map files
url_map = 'http://www.glerl.noaa.gov/ftp/EMF/glcfs/gridded_fields/map_files/'

In [5]:
# File types of interest [wave, wind, surface current, surface temperature]
#extension_list = ['wav', 'wnd', 'cur', 'swt', 'ice', 'o', 'e', 's', 'm', 'h']
extension_list = ['wav', 'wnd', 'cur', 'swt', 'ice', 'o', 'e', 's', 'm', 'h']

In [6]:
# Define NOAA database class
class NoaaDB:
    
    """
    Class: NoaaDB
        - This class converts the NCAST|FCAST FTP database list into a pandas DataFrame
        
        - The gridded fields filename format is:

          LYYYYDDDHH.N.EXT

          L    = lake letter (s=Superior, m=Michigan, h=Huron, e=Erie, o=Ontario)
          YYYY = year at start of simulation (GMT)
          DDD  = Day Of Year at start of simulation (GMT)
          HH   = hr at start of simulation (GMT)
          N    = Site Number
    """

    # Initialize object
    def __init__(self, url, extension_list):
        
        # Set object attributes
        self.url = url                            # FTP 'gridded data' database URL
        self.url_ncast = self.url  + 'NCAST/'     # FTP NCAST database URL  
        self.url_fcast = self.url  + 'FCAST/'     # FTP FCAST database URL   
        self.extension_list = extension_list      # List of file type extensions   
        self.html_ncast = {}                      # HTML from FCAST page
        self.html_fcast = {}                      # HTML from FCAST page
        self.html_obj_ncast = {}                  # FCAST HTML Object
        self.html_obj_fcast = {}                  # FCAST HTML Object
        
        # Current UTC time as GMT
        self.current_datetime_GMT = datetime.utcnow().replace(tzinfo=tz.gettz('GMT'))
        
        # Database dataframe
        self.df = pd.DataFrame(index=[], columns=['filename', 'file_extension', 
                                                  'filetype', 'lake', 'file_datetime', 
                                                  'current_datetime','forecast_type', 'file_url'])                 
        
    # Get NCAST database 
    def getNcast(self):
        
        # Get DataFrame row count
        df_rows = self.df.shape[0]-1
        
        # Get HTML from database page
        self.html_ncast   = requests.get(self.url_ncast)
        
        # Create BeautifulSoup object
        self.html_obj_ncast   = BeautifulSoup(self.html_ncast.content)
        
        # Set database list as dataframe 
        for link in self.html_obj_ncast.findAll('a', href=True):
            if (
                link.contents[0].split('.')[-1] in self.extension_list and 
                link.contents[0][0] in self.extension_list
               ):
                
                df_rows += 1  # row count                                                 
                
                filename = link.contents[0]                                         # file name
                file_extension = link.contents[0].split('.')[-1]                    # file extension
                
                # file datetime as GMT
                file_datetime = datetime.strptime(link.contents[0].split('.')[0]
                                                  [1:len(link.contents[0].split('.')[0])], "%Y%j%H") # file datetime (GMT)
                file_datetime = file_datetime.replace(tzinfo=tz.gettz('GMT'))
                
                # Set file type
                if file_extension == 'wav':           # Wave
                    filetype = 'WAVES'
                elif file_extension == 'wnd':         # Wind
                    filetype = 'WINDS'
                elif file_extension == 'cur':         # Surface Current
                    filetype = 'SURFACE CURRENTS'
                elif file_extension == 'swt':         # Surface Temperature
                    filetype = 'SURFACE TEMPS'
                elif file_extension == 'ice':         # Ice Conditions
                    filetype = 'ICE PARAMS'
                
                # Set great lake
                if filename[0] == 'e':       # Lake Erie
                    lake = 'erie'
                elif filename[0] == 'h':     # Lake Huron
                    lake = 'huron'
                elif filename[0] == 'o':     # Lake Ontario
                    lake = 'ontario'
                elif filename[0] == 's':     # Lake Superior
                    lake = 'superior'
                elif filename[0] == 'm':     # Lake Michigan
                    lake = 'michigan'            
                
                # save to dataframe
                self.df.loc[df_rows] = [filename, file_extension, filetype, 
                                        lake, file_datetime, self.current_datetime_GMT, 
                                        'NCAST', self.url_ncast]                                   
                          
    # Get FCAST database 
    def getFcast(self):
        
        # Get DataFrame row count
        df_rows = self.df.shape[0]-1
        
        # Get HTML from database page
        self.html_fcast   = requests.get(self.url_fcast)
        
        # Create BeautifulSoup object
        self.html_obj_fcast   = BeautifulSoup(self.html_fcast.content)
        
        # Set database list as dataframe 
        for link in self.html_obj_fcast.findAll('a', href=True):
            if (
                link.contents[0].split('.')[-1] in self.extension_list and 
                link.contents[0][0] in self.extension_list
               ):
                
                df_rows += 1  # row count                                                 
                
                filename = link.contents[0]                                         # file name
                file_extension = link.contents[0].split('.')[-1]                    # file extension
                
                # file datetime as GMT
                file_datetime = datetime.strptime(link.contents[0].split('.')[0]
                                                  [1:len(link.contents[0].split('.')[0])], "%Y%j%H")  # file datetime (GMT)
                file_datetime = file_datetime.replace(tzinfo=tz.gettz('GMT'))
                
                # Set file type
                if file_extension == 'wav':           # Wave
                    filetype = 'WAVES'
                elif file_extension == 'wnd':         # Wind
                    filetype = 'WINDS'
                elif file_extension == 'cur':         # Surface Current
                    filetype = 'SURFACE CURRENTS'
                elif file_extension == 'swt':         # Surface Temperature
                    filetype = 'SURFACE TEMPS'
                elif file_extension == 'ice':         # Ice Conditions
                    filetype = 'ICE PARAMS'
                
                # Set great lake
                if filename[0] == 'e':       # Lake Erie
                    lake = 'erie'
                elif filename[0] == 'h':     # Lake Huron
                    lake = 'huron'
                elif filename[0] == 'o':     # Lake Ontario
                    lake = 'ontario'
                elif filename[0] == 's':     # Lake Superior
                    lake = 'superior'
                elif filename[0] == 'm':     # Lake Michigan
                    lake = 'michigan'       
                
                # save to dataframe
                self.df.loc[df_rows] = [filename, file_extension, filetype, 
                                        lake, file_datetime, self.current_datetime_GMT, 
                                        'FCAST', self.url_fcast]

In [7]:
# Create NoaaDB object
noaa_files = NoaaDB(url, extension_list)

In [8]:
# Get NCAST files 
noaa_files.getNcast()

In [9]:
# Get FCAST files 
noaa_files.getFcast()

In [10]:
# Show Dataframe of NoaaDB files 
noaa_files.df.head()

Unnamed: 0,filename,file_extension,filetype,lake,file_datetime,current_datetime,forecast_type,file_url
0,e201611706.0.ice,ice,ICE PARAMS,erie,2016-04-26 06:00:00,2016-06-08 00:54:35.509431,NCAST,http://www.glerl.noaa.gov/ftp/EMF/glcfs/gridde...
1,e201611712.0.ice,ice,ICE PARAMS,erie,2016-04-26 12:00:00,2016-06-08 00:54:35.509431,NCAST,http://www.glerl.noaa.gov/ftp/EMF/glcfs/gridde...
2,e201611718.0.ice,ice,ICE PARAMS,erie,2016-04-26 18:00:00,2016-06-08 00:54:35.509431,NCAST,http://www.glerl.noaa.gov/ftp/EMF/glcfs/gridde...
3,e201611800.0.ice,ice,ICE PARAMS,erie,2016-04-27 00:00:00,2016-06-08 00:54:35.509431,NCAST,http://www.glerl.noaa.gov/ftp/EMF/glcfs/gridde...
4,e201611806.0.ice,ice,ICE PARAMS,erie,2016-04-27 06:00:00,2016-06-08 00:54:35.509431,NCAST,http://www.glerl.noaa.gov/ftp/EMF/glcfs/gridde...


In [11]:
# Show Dataframe of NoaaDB files 
noaa_files.df.tail()

Unnamed: 0,filename,file_extension,filetype,lake,file_datetime,current_datetime,forecast_type,file_url
1107,s201615900.0.wnd,wnd,WINDS,superior,2016-06-07 00:00:00,2016-06-08 00:54:35.509431,FCAST,http://www.glerl.noaa.gov/ftp/EMF/glcfs/gridde...
1108,s201615912.0.cur,cur,SURFACE CURRENTS,superior,2016-06-07 12:00:00,2016-06-08 00:54:35.509431,FCAST,http://www.glerl.noaa.gov/ftp/EMF/glcfs/gridde...
1109,s201615912.0.swt,swt,SURFACE TEMPS,superior,2016-06-07 12:00:00,2016-06-08 00:54:35.509431,FCAST,http://www.glerl.noaa.gov/ftp/EMF/glcfs/gridde...
1110,s201615912.0.wav,wav,WAVES,superior,2016-06-07 12:00:00,2016-06-08 00:54:35.509431,FCAST,http://www.glerl.noaa.gov/ftp/EMF/glcfs/gridde...
1111,s201615912.0.wnd,wnd,WINDS,superior,2016-06-07 12:00:00,2016-06-08 00:54:35.509431,FCAST,http://www.glerl.noaa.gov/ftp/EMF/glcfs/gridde...


<h1>Step 2</h1>
<h3>Find Most Recently Uploaded Files</h3>

In [12]:
class NoaaData():
    
    """
    NOAA database file class
    """
    
    def __init__(self, noaa_files, url_map):
        
        """
        Initialize object
        """
        
        # Set object attributes
        self.noaa_files = noaa_files              # user input NoaaDB object (Pandas Dataframe of all files in Noaa DB)
        self.df_ncast = {}                        # downloaded NCAST text files as DataFrame
        self.df_fcast = {}                        # downloaded FCAST text files as DataFrame
        self.df = {}                              # conbined NCAST and FCAST DataFrames of most recent 120 hr forecast
        self.url_map = url_map                    # url containing map files
        
        # Set NOAA File attributes
        self.attributes = {
            'wave':        ['grid_number', 'wave_height', 'wave_direction', 'wave_period'],  
            'wind':        ['grid_number', 'wind_speed', 'wind_direction'],
            'temperature': ['grid_number', 'surface_temperature'],
            'current':     ['grid_number', 'current_speed', 'current_direction'],
            'ice':         ['grid_number', 'ice_concentration', 'ice_thickness', 'ice_speed', 'ice_direction']
        }

        # Set map path
        self.map_path = r'C:\Users\Sebastian\Projects\Websites\Surfcast\GetData\GridFiles'
        
        # Get Newest NCAST Files in database and save as DataFrame
        maxtime = noaa_files.df[(noaa_files.df.forecast_type == 'NCAST')]['file_datetime'].max()
        self.newest_files_ncast = noaa_files.df[(noaa_files.df.forecast_type == 'NCAST') &
                                                (noaa_files.df.file_datetime == maxtime)].reset_index(drop=True)
        
        # Get Newest FCAST Files in database and save as DataFrame
        maxtime = noaa_files.df[(noaa_files.df.forecast_type == 'FCAST')]['file_datetime'].max()
        self.newest_files_fcast = noaa_files.df[(noaa_files.df.forecast_type == 'FCAST') & 
                                                (noaa_files.df.file_datetime == maxtime)].reset_index(drop=True)

        
        
        
    def df_setup(self):
        
        """
        Set up empty DataFrames to hold text file data
        """
 
        # Get list of unique NCAST file types and lakes
        filetype_ncast = self.newest_files_ncast.filetype.unique()
        
        # Setup NCAST DataFrame
        self.df_ncast  = pd.DataFrame(columns=['year', 'day', 'hour', 'datetime',                  
                                               'grid_number', 'latitude', 'longitude', 
                                               'map', 'lake'])   
        
        if any('WAVES' in s for s in filetype_ncast):                         # wave
            for col in self.attributes['wave']:
                self.df_ncast[col] = pd.Series(index=self.df_ncast.index) 
                
        if any('WINDS' in s for s in filetype_ncast):                         # wind              
            for col in self.attributes['wind']:
                self.df_ncast[col] = pd.Series(index=self.df_ncast.index)
                
        if any('SURFACE TEMPS' in s for s in filetype_ncast):                 # temperature
            for col in self.attributes['temperature']:
                self.df_ncast[col] = pd.Series(index=self.df_ncast.index)
                
        if any('SURFACE CURRENTS' in s for s in filetype_ncast):              # current
            for col in self.attributes['current']:
                self.df_ncast[col] = pd.Series(index=self.df_ncast.index)
                
        if any('ICE PARAMS' in s for s in filetype_ncast):                    # ice
            for col in self.attributes['ice']:
                self.df_ncast[col] = pd.Series(index=self.df_ncast.index)

        # Get list of unique FCAST file types and lakes
        filetype_fcast = self.newest_files_fcast.filetype.unique()

        # Setup FCAST DataFrame
        self.df_fcast  = pd.DataFrame(columns=['year', 'day', 'hour', 'datetime',              
                                               'grid_number', 'latitude', 'longitude', 
                                               'map', 'lake'])   
        
        if any('WAVES' in s for s in filetype_fcast):                         # wave
            for col in self.attributes['wave']:
                self.df_fcast[col] = pd.Series(index=self.df_fcast.index) 
                
        if any('WINDS' in s for s in filetype_fcast):                         # wind              
            for col in self.attributes['wind']:
                self.df_fcast[col] = pd.Series(index=self.df_fcast.index)
                
        if any('SURFACE TEMPS' in s for s in filetype_fcast):                 # temperature
            for col in self.attributes['temperature']:
                self.df_fcast[col] = pd.Series(index=self.df_fcast.index)
                
        if any('SURFACE CURRENTS' in s for s in filetype_fcast):              # current
            for col in self.attributes['current']:
                self.df_fcast[col] = pd.Series(index=self.df_fcast.index)
                
        if any('ICE PARAMS' in s for s in filetype_fcast):                    # ice
            for col in self.attributes['ice']:
                self.df_fcast[col] = pd.Series(index=self.df_fcast.index)

                
   

    @classmethod
    def getHeaderValue(cls, val):
        
        """
        Define function to get and update text file header column
        """

        global headerString

        if 'dat' in val and val != headerString:
            headerString = val
            return headerString
        else:
            return headerString
        
        
        
        
    @classmethod
    def getMapFile(cls, val, lake):
        
        """
        Define function to format map string
        """

        map = val.split('/')[-1]
        map = map.split('.')[0] + '.' + 'map'

        if lake == 'superior':
            map = 'superior' + map.split('sup')[1]

        return map

    
    
    
    @classmethod
    def getAttributeList(cls, fileType, attributes):
        
        """
        Define function to get list of attributes based on filetype
        """

        if  fileType == 'WAVES':
            return attributes['wave']
        elif fileType == 'WINDS':
            return attributes['wind']
        elif fileType == 'SURFACE TEMPS':
            return attributes['temperature']
        elif fileType == 'SURFACE CURRENTS':
            return attributes['current']
        elif fileType == 'ICE PARAMS':
            return attributes['ice']
        
        
                
                        
    def df_fill(self):
        
        """
        Fill NCAST and FCAST DataFrames with text file data
        """
        
        # -------------------------------------------------------------------------------------------------------------------- #
        # NCAST 
        # -------------------------------------------------------------------------------------------------------------------- #
        
        # Loop through lakes
        for lake in self.newest_files_ncast.lake.unique():

            # Get DataFrame of attribute files to downhole
            df_lake = self.newest_files_ncast[(self.newest_files_ncast.lake == lake)]
            
            # Set attribute counter
            attributeCount = 0
            
            # Loop through atributes
            for df_index in df_lake.index:
                
                # Update attribute counter
                attributeCount += 1
                
                # Get list of attributes based on filetype
                cols = self.getAttributeList(self.newest_files_ncast.filetype[df_index], self.attributes)

                # Download lake attribute text file and save as DataFrame
                dfFile = pd.read_table(
                    self.newest_files_ncast.file_url[df_index] + self.newest_files_ncast.filename[df_index], 
                    header=None,
                    names=['data']
                )

                # Set global variable text file header
                global headerString
                headerString = dfFile.ix[0, 'data']
       
                # Get text file header as new DataFrame column   
                dfFile['header'] = dfFile['data'].map(lambda x: self.getHeaderValue(x))

                # Add Lake column
                dfFile['lake'] = lake
                
                # Add CAST Type
                dfFile['forecast_type'] = 'NCAST'

                # Remove header rows
                dfFile['data'] = dfFile['data'].map(lambda x: np.nan if 'dat' in x else x)
                dfFile = dfFile.dropna()

                # Extract date and map information from header and set as DataFrame columns
                dfFile[['year', 'day', 'hour', 'map']] = dfFile['header'].str.split(return_type='frame').ix[:, 0:3]

                # Parse attribute column as set as DataFrame columns
                dfFile[cols] = dfFile['data'].str.split(return_type='frame')

                # Formate map string column
                dfFile['map'] = dfFile['map'].map(lambda x: self.getMapFile(x, lake))
                
                # Add Datetime Object
                dfFile['datetime'] = dfFile.apply(lambda x: 
                                                  datetime.strptime(x['year'] + x['day'] + x['hour'], "%Y%j%H"), 
                                                  axis=1)

                # Drop useless columns 
                dfFile = dfFile.drop('data', axis=1).drop('header', axis=1)
                
                # Merge lake specific attribute DataFrames
                if attributeCount == 1:
                    dfLake = dfFile
                else:
                    dfLake = pd.merge(dfLake, dfFile[cols + ['datetime']])
                
            # Get map grid locations
            mapFile = pd.read_table(self.url_map + dfFile.ix[dfFile.index[0], 'map'], header=None, 
                                    sep=r"\s*", names=['sequence number', 'fortran column','fortran row',
                                                       'latitude', 'longitude', 'depth'])
                
            # Add latitude and longitude
            multiple = int(dfLake.shape[0] / mapFile.shape[0])

            dfLake['latitude'] =  pd.concat([mapFile] * multiple, ignore_index=True)['latitude'] 
            dfLake['longitude'] =  pd.concat([mapFile] * multiple, ignore_index=True)['longitude']

            # Append lake DataFrames
            self.df_ncast = self.df_ncast.append(dfLake, ignore_index=True)
            
        # -------------------------------------------------------------------------------------------------------------------- #
        # FCAST 
        # -------------------------------------------------------------------------------------------------------------------- #
        
        # Loop through lakes
        for lake in self.newest_files_fcast.lake.unique():
            
            # Get DataFrame of attribute files to downhole
            df_lake = self.newest_files_fcast[(self.newest_files_fcast.lake == lake)]

            # Set attribute counter
            attributeCount = 0

            # Loop through atributes
            for df_index in df_lake.index:

                # Update attribute counter
                attributeCount += 1

                # Get list of attributes based on filetype
                cols = self.getAttributeList(self.newest_files_fcast.filetype[df_index], self.attributes)

                # Download lake attribute text file and save as DataFrame
                dfFile = pd.read_table(
                    self.newest_files_fcast.file_url[df_index] + self.newest_files_fcast.filename[df_index], 
                    header=None,
                    names=['data']
                )

                # Set global variable text file header
                global headerString
                headerString = dfFile.ix[0, 'data']

                # Get text file header as new DataFrame column   
                dfFile['header'] = dfFile['data'].map(lambda x: self.getHeaderValue(x))

                # Add Lake column
                dfFile['lake'] = lake
                
                # Add CAST Type
                dfFile['forecast_type'] = 'FCAST'

                # Remove header rows
                dfFile['data'] = dfFile['data'].map(lambda x: np.nan if 'dat' in x else x)
                dfFile = dfFile.dropna()

                # Extract date and map information from header and set as DataFrame columns
                dfFile[['year', 'day', 'hour', 'map']] = dfFile['header'].str.split(return_type='frame').ix[:, 0:3]

                # Parse attribute column as set as DataFrame columns
                dfFile[cols] = dfFile['data'].str.split(return_type='frame')

                # Formate map string column
                dfFile['map'] = dfFile['map'].map(lambda x: self.getMapFile(x, lake))

                # Add Datetime Object
                dfFile['datetime'] = dfFile.apply(lambda x: 
                                                  datetime.strptime(x['year'] + x['day'] + x['hour'], "%Y%j%H"), 
                                                  axis=1)

                # Drop useless columns 
                dfFile = dfFile.drop('data', axis=1).drop('header', axis=1)

                # Merge lake specific attribute DataFrames
                if attributeCount == 1:
                    dfLake = dfFile
                else:
                    dfLake = pd.merge(dfLake, dfFile[cols + ['datetime']])
                
            # Get map grid locations
            mapFile = pd.read_table(self.url_map + dfFile.ix[dfFile.index[0], 'map'], header=None, 
                                    sep=r"\s*", names=['sequence number', 'fortran column','fortran row',
                                                       'latitude', 'longitude', 'depth'])
            
            # Add latitude and longitude
            multiple = int(dfLake.shape[0] / mapFile.shape[0])

            dfLake['latitude'] =  pd.concat([mapFile] * multiple, ignore_index=True)['latitude'] 
            dfLake['longitude'] =  pd.concat([mapFile] * multiple, ignore_index=True)['longitude']
            
            # Append lake DataFrames
            self.df_fcast = self.df_fcast.append(dfLake, ignore_index=True)
            
        # -------------------------------------------------------------------------------------------------------------------- #
        # Merge NCAST and FCAST 
        # -------------------------------------------------------------------------------------------------------------------- #
        
        # Add NCAST
        self.df = self.df_ncast 
        
        # Add FCAST where time > maximum NCAST time
        self.df = self.df.append(self.df_fcast[self.df_fcast['datetime'] > self.df_ncast['datetime'].max()])

In [13]:
# Get all newest files
noaaData = NoaaData(noaa_files, url_map)

In [15]:
# Show newest FCAST files
noaaData.newest_files_fcast.head()

Unnamed: 0,filename,file_extension,filetype,lake,file_datetime,current_datetime,forecast_type,file_url
0,e201615912.0.cur,cur,SURFACE CURRENTS,erie,2016-06-07 12:00:00,2016-06-08 00:54:35.509431,FCAST,http://www.glerl.noaa.gov/ftp/EMF/glcfs/gridde...
1,e201615912.0.swt,swt,SURFACE TEMPS,erie,2016-06-07 12:00:00,2016-06-08 00:54:35.509431,FCAST,http://www.glerl.noaa.gov/ftp/EMF/glcfs/gridde...
2,e201615912.0.wav,wav,WAVES,erie,2016-06-07 12:00:00,2016-06-08 00:54:35.509431,FCAST,http://www.glerl.noaa.gov/ftp/EMF/glcfs/gridde...
3,e201615912.0.wnd,wnd,WINDS,erie,2016-06-07 12:00:00,2016-06-08 00:54:35.509431,FCAST,http://www.glerl.noaa.gov/ftp/EMF/glcfs/gridde...
4,h201615912.0.cur,cur,SURFACE CURRENTS,huron,2016-06-07 12:00:00,2016-06-08 00:54:35.509431,FCAST,http://www.glerl.noaa.gov/ftp/EMF/glcfs/gridde...


In [18]:
noaaData['newest_files_fcast']

TypeError: 'NoaaData' object is not subscriptable

<h1>Step 3</h1>
<h3>Save Data From Newest Files As DataFrame</h3>

In [16]:
# Get newest files
noaaData.df_setup()

# View DataFrame
noaaData.df_ncast.head()

Unnamed: 0,year,day,hour,datetime,grid_number,latitude,longitude,map,lake,wave_height,wave_direction,wave_period,wind_speed,wind_direction,surface_temperature,current_speed,current_direction


In [17]:
# Get data
noaaData.df_fill()

KeyboardInterrupt: 

In [None]:
# View NCAST DataFrame
noaaData.df_ncast.tail()

In [None]:
# View FCAST DataFrame
noaaData.df_fcast.tail()

In [None]:
# View merged DataFrame
noaaData.df.tail()

In [None]:
# Save DataFrame to CSV
# noaaData.df.to_csv('C:\\Users\\sgoodfellow\\Documents\\Sebastian\\Projects\\Websites\\Surfcast\\GetData\\Test.csv')

<h1>Step 4</h1>
<h3>Load Surf Spots</h3>
<h5>Thanks Grif!</h5>

In [None]:
# Set surf spots file path and file name
path = r'C:\Users\sgoodfellow\Documents\Sebastian\Projects\Websites\Surfcast\GetData\SurfSpots'
file = 'SurfSpots.csv'

In [None]:
# Load surf spot data as DataFrame
surf_spots = pd.read_csv(os.path.join(path, file))

In [None]:
# Show surf spot data
surf_spots.sort('lake')

<h1>Step 5</h1>
<h3>Plot Raw Data</h3>

In [None]:
# Plot grid points
plt.figure(figsize=(15, 10))

for lake in noaaData.df_ncast.lake.unique():
    
    df = noaaData.df_ncast[(noaaData.df_ncast.lake == lake) & 
                               (noaaData.df_ncast.datetime == noaaData.df_ncast.datetime.unique()[0])] 
    
    plt.plot(-df.longitude, df.latitude, marker='.', linestyle='none', ms=2)

plt.plot(surf_spots.longitude, surf_spots.latitude, color='k', marker='o', linestyle='none', ms=5)    
    
plt.tick_params(labelsize=14)

plt.xlabel('Longitude', fontsize=20)
plt.ylabel('Latitude', fontsize=20)

plt.grid('on')

In [None]:
# Plot grid points as 3D scatter
attribute = 'wave_height'

plt.figure(figsize=(10, 6))
    
df = noaaData.df_ncast[(noaaData.df_ncast.datetime == noaaData.df_ncast.datetime.unique()[0])] 

sc = plt.scatter(-df.longitude, df.latitude, c = df[attribute], edgecolors='none')

cb = plt.colorbar(sc)
cb.set_label(attribute.replace('_', ' ').title(), fontsize=20)
plt.tick_params(labelsize=14)

plt.xlabel('Longitude', fontsize=20)
plt.ylabel('Latitude', fontsize=20)

plt.grid('on')

<h1>Step 6</h1>
<h3>Create SQL Database</h3>

In [None]:
# Initializes database with filename 311_8M.db in current directory
#surfcast_sql_db = create_engine(r'sqlite:///C:\Users\Sebastian\Projects\Websites\Surfcast\GetData\Surfcast.db')

<h1>Step 7</h1>
<h3>Update SQL Database</h3>

In [None]:
noaa_files.df.to_sql('data', surfcast_sql_db, if_exists='append')

In [None]:
df = pd.read_sql_query('SELECT lake FROM data', surfcast_sql_db)
df.head()

In [None]:
data  = pd.DataFrame(columns=['year', 'day', 'hour', 'datetime', 'grid_number', 'latitude', 'longitude', 'map', 'lake',
                              'grid_number', 'wave_height', 'wave_direction', 'wave_period',
                              'wind_speed', 'wind_direction',
                              'surface_temperature',
                              'currect_speed', 'current_direction',
                              'ice_concentration', 'ice_thickness', 'ice_speed', 'ice_direction']) 

data.head()

In [None]:
# NOAA attributes
wave =        ['grid_number', 'wave_height', 'wave_direction', 'wave_period']
wind =        ['grid_number', 'wind_speed', 'wind_direction']
temperature = ['grid_number', 'surface_temperature']
current =     ['grid_number', 'currect_speed', 'current_direction']
ice =         ['grid_number', 'ice_concentration', 'ice_thickness', 'ice_speed', 'ice_direction']

lake = 'superisor'

# Load text load
df = pd.read_table('http://www.glerl.noaa.gov/ftp/EMF/glcfs/gridded_fields/FCAST/h201615012.0.wav', header=None)

# Name text column
df.columns = ['data']

# Set global variable text file header
headerString = df.ix[0, 'data']

# Define function to get text file header column
def getHeaderValue(val):
    
    global headerString
    
    if 'dat' in val and val != headerString:
        headerString = val
        return headerString
    else:
        return headerString

# Get text file header column    
df['header'] = df['data'].map(lambda x: getHeaderValue(x))

# Add Lake column
df['lake'] = lake

# Remove header row
df['data'] = df['data'].map(lambda x: np.nan if 'dat' in x else x)
df = df.dropna()

df['header'].str.split(return_type='frame')

# Get Year column
df[['year', 'day', 'hour', 'map', 'type', 'point']] = df['header'].str.split(return_type='frame')

# Get wave data
df[['grid_number', 'wave_height', 'wave_direction', 'wave_period']] = df['data'].str.split(return_type='frame')

# Define function to format file
def getMapFile(val, lake):
    
    map = val.split('/')[-1]
    map = map.split('.')[0] + '.' + 'map'
    
    if lake == 'superior':
        map = 'superior' + map.split('sup')[1]
        
    return map

# format file
df['map'] = df['map'].map(lambda x: getMapFile(x, lake))

# Add Datetime Object
df['datetime'] = df.apply(lambda x: datetime.strptime(x['year'] + x['day'] + x['hour'], "%Y%j%H"), axis=1)

# Drop 
df = df.drop('data', axis=1).drop('header', axis=1)

df.head()

In [None]:
yup = df['header'].str.split(return_type='frame')
yup.head()

In [None]:
yup[[4, 5]].apply(lambda x : x[0] + ' ' + x[1], axis=1)

In [None]:
yup[[4, 5]]

In [None]:
pd.concat([data, df], join='outer', axis = 1)

data.head()

In [None]:
wave =        ['grid_number', 'wave_height', 'wave_direction', 'wave_period']

In [None]:
wave + ['hi']

In [None]:
wave

In [None]:
dun = {
    'wave': ['wave_height', 'wave_direction', 'wave_period'],  
    'wind': ['wind_speed', 'wind_direction'],
    'temperature': ['surface_temperature'],
    'current': ['currect_speed', 'current_direction'],
    'ice': ['ice_concentration', 'ice_thickness', 'ice_speed', 'ice_direction']
}

In [None]:
dun['wave']

In [None]:
mapTest = pd.read_table(url_map + 'erie2km.map', header=None, sep=r"\s*", names=['sequence number',
                                                                                 'fortran column',
                                                                                 'fortran row',
                                                                                 'latitude',
                                                                                 'longitude',
                                                                                 'depth'])
mapTest.head()