<h1>Surfcast.com</h1>
<h5>A BSC Creation</h5>

In [11]:
# Reset Notebook
%reset

Once deleted, variables cannot be recovered. Proceed (y/[n])? y


In [12]:
# Import python packages
from bs4 import BeautifulSoup
from urllib import request
import requests
from datetime import datetime
import matplotlib
import numpy as np
import matplotlib.pyplot as plt

# Matplotlib inline
%matplotlib inline

In [13]:
# Set URL path to NOAA database
url = "http://www.glerl.noaa.gov/ftp/EMF/glcfs/gridded_fields/FCAST/"

In [14]:
# Get HTML from database page
html = requests.get(url)

In [19]:
# Create BeautifulSoup object
html_obj = BeautifulSoup(html.content)

In [20]:
# File types of interest [wave, wind, surface current, surface temperature]
file_types = ['wav', 'wnd', 'cur', 'swt']

In [16]:
# Create empty dictionary
files = []

In [21]:
# Define NOAA file class
class NoaaFile:
    
    """
    Class: NoaaFile
        - This class decribes FCAST|NCAST database files.
    """

    # Initialize object
    def __init__(self, **kwargs):
        
        # Set object attributes
        self.filename = kwargs.get('filename')                  # File name
        self.file_extension = kwargs.get('file_extension')      # File extension
        self.filedate = kwargs.get('filedate')                  # File upload date
        self.filetime = kwargs.get('filetime')                  # File upload time
        self.filesize = kwargs.get('filesize')                  # File size (MB)
        
        # Convert data and time strings to a date|time object
        self.file_datetime = datetime.strptime(self.filedate + '-' + self.filetime, '%d-%b-%Y-%H:%M')

        # Set file type
        if self.file_extension == 'wav':           # Wave
            self.filetype = 'WAVES'
        elif self.file_extension == 'wnd':         # Wind
            self.filetype = 'WINDS'
        elif self.file_extension == 'cur':         # Surface Current
            self.filetype = 'SURFACE CURRENTS'
        elif self.file_extension == 'swt':         # Surface Temperature
            self.filetype = 'SURFACE TEMPS'
            
        # Set great lake
        if self.filename[0] == 'e':       # Lake Erie
            self.lake = 'erie'
        elif self.filename[0] == 'h':     # Lake Huron
            self.lake = 'huron'
        elif self.filename[0] == 'o':     # Lake Ontario
            self.lake = 'ontario'
        elif self.filename[0] == 's':     # Lake Superior
            self.lake = 'superior'
        elif self.filename[0] == 'm':     # Lake Michigan
            self.lake = 'michigan'

In [22]:
# Create list of NoaaFile objects for all current database entires
for link in html_obj.findAll('tr'):
    if len(link.contents) == 5 and link.contents[1].text.split('.')[-1] in file_types:
        files.append(NoaaFile(filename=link.contents[1].text,
                              file_extension=link.contents[1].text.split('.')[-1],
                              filedate=link.contents[2].text.split(' ')[0],
                              filetime=link.contents[2].text.split(' ')[1],
                              filesize=link.contents[3].text[0:len(link.contents[3].text)-1]))

In [252]:
# Define file download funtion
def download_file(url, files, lake, attribute):
    
    """
    Function: download_file
        - This function will download the most recently uploaded file to the NOAA database that 
          corresponds to the lake and attribute input by the user.
          
    Inputs:
        - url:        Database URL (NCAST or FCAST)
        - files:      List of all NOAA file objects currently in database
        - lake:       Lake of interest [ontario, michigan, erie, superior, huron] 
        - attribute:  Data measurement [winds, waves, surface currents, surface temperatures] 
        
    Outputs:
        - file:       The corresponding text file
    """
    
    # Find most up to date FORECAST
    date_time = files[0].file_datetime
    for file in files:
        if file.file_datetime > date_time and file.lake == lake and file.filetype == attribute.upper():
            date_time = file.file_datetime
            filename = file.filename

    # Join url and filename
    file_url = str(url + filename)

    # Send file request to server and download
    response = request.urlopen(file_url)

    # Parse test file by line breaks
    file = str(response.read()).split('\\n')

    return file

In [251]:
# Download most up to date FORECAST file
file = download_file(url, files, 'ontario', 'waves')

In [34]:
# Define Lake data class
class LakeDataFile(NoaaFile):
    
    # Initialize object
    def __init__(self, **kwargs):
        
        # Set object attributes
        self.textfile = []                  # Raw text file
        self.lake data
        
    # Download text file
    def download_file(self, url, files, lake, attribute):
        
        """
        Function: download_file
            - This function will download the most recently uploaded file to the NOAA database that 
              corresponds to the lake and attribute input by the user.

        Inputs:
            - url:        Database URL (NCAST or FCAST)
            - files:      List of all NOAA file objects currently in database
            - lake:       Lake of interest [ontario, michigan, erie, superior, huron] 
            - attribute:  Data measurement [winds, waves, surface currents, surface temperatures] 

        Outputs:
            - file:       The corresponding text file
        """

        # Find most up to date FORECAST
        date_time = files[0].file_datetime
        for file in files:
            if file.file_datetime > date_time and file.lake == lake and file.filetype == attribute.upper():
                date_time = file.file_datetime
                filename = file.filename

        # Join url and filename
        file_url = str(url + filename)

        # Send file request to server and download
        response = request.urlopen(file_url)

        # Parse test file by line breaks
        self.textfile = str(response.read()).split('\\n')
        
    
#         # Extract header information  
#         for row in range(len(file)):
#             if files[file_num].filetype in file[row]:
#                 # Year, day, hour
#                 if file[row][0] == 'b':  
#                     self.year = file[row][2:6]  # year
#                     self.day = file[row][7:10]  # day of the year
#                     print(self.year)
#                     print(self.day)
#                     print('')
#                 else: 
#                     self.year = file[row][0:4]  # year
#                     self.day = file[row][5:8]  # day of the year
#                     print(self.year)
#                     print(self.day)
#                     print('')

In [43]:
dummy = LakeData()

In [46]:
dummy.textfile

[]

In [49]:
dummy.download_file(url, files, 'ontario', 'waves')

In [50]:
dummy.textfile

["b'2015 354 13     /glcfs/bathy/ontario5km.dat    WAVES                   746",
 '     1   0.243   96  2.0',
 '     2   0.360   95  2.6',
 '     3   0.467   94  3.0',
 '     4   0.544   94  3.4',
 '     5   0.129  143  1.8',
 '     6   0.189  141  2.1',
 '     7   0.210  141  2.2',
 '     8   0.263  107  2.1',
 '     9   0.377   97  2.6',
 '    10   0.476   94  2.9',
 '    11   0.565   92  3.2',
 '    12   0.644   91  3.5',
 '    13   0.714   91  3.7',
 '    14   0.721  176  4.2',
 '    15   0.231   61  1.9',
 '    16   0.303   63  2.2',
 '    17   0.346   63  2.4',
 '    18   0.371   71  2.5',
 '    19   0.405   92  2.6',
 '    20   0.435   99  2.7',
 '    21   0.501   97  3.0',
 '    22   0.576   95  3.2',
 '    23   0.649   94  3.4',
 '    24   0.728   92  3.7',
 '    25   0.792   92  3.9',
 '    26   0.848   92  4.1',
 '    27   0.895   91  4.2',
 '    28   0.761  145  4.3',
 '    29   1.015  140  4.9',
 '    30   1.107  134  5.0',
 '    31   1.190  131  5.2',
 '    32   1.259  12