In [3]:
%matplotlib inline
import numpy as np
import matplotlib.tri as Tri
import matplotlib.pyplot as plt
#from mpl_toolkits.basemap import Basemap
from netCDF4 import Dataset as NetCDFFile 
import netCDF4
from netCDF4 import num2date, date2num, date2index
import datetime as dt
import pandas as pd
from StringIO import StringIO
import xarray as xr

In [4]:
import re

def extractURLsatellite(fileURL, satName):
    """
    Function to extract the URLs for a specific satellite from the IMOS URLs list
    
    Parameters:
    -----------
    
    - fileURL : IMOS URLs list as a txt file
    - satName : name of the satellite such as JASON-2 JASON-3 
    
    Ouputs:
    ------
    
    - getFiles : list of URLs for the desired satellite
    
    """
    
    getFiles = []
    
    with open(fileURL) as f:
        for line in f:
            if re.search(r"%s"%satName, line):
                changeURL = re.sub('http://data.aodn.org.au', 'http://thredds.aodn.org.au/thredds/dodsC', line)
                getFiles.append(changeURL)
                
    return getFiles

In [5]:
jason2URL = extractURLsatellite( fileURL = 'IMOSURLs.txt', satName = 'JASON-2')

jason3URL = extractURLsatellite( fileURL = 'IMOSURLs.txt', satName = 'JASON-3')

saralURL = extractURLsatellite( fileURL = 'IMOSURLs.txt', satName = 'SARAL')

sentinel3aURL = extractURLsatellite( fileURL = 'IMOSURLs.txt', satName = 'SENTINEL-3A')

cryosat2URL = extractURLsatellite( fileURL = 'IMOSURLs.txt', satName = 'CRYOSAT-2')

enviURL = extractURLsatellite(fileURL = 'IMOSURLs.txt', satName = 'ENVISAT')

geosatURL = extractURLsatellite(fileURL = 'IMOSURLs.txt', satName = 'GEOSAT')

ersURL = extractURLsatellite(fileURL = 'IMOSURLs.txt', satName = 'ERS-2')

gfoURL = extractURLsatellite(fileURL = 'IMOSURLs.txt', satName = 'GFO')

topURL = extractURLsatellite(fileURL = 'IMOSURLs.txt', satName = 'TOPEX')

In [6]:
allURL = []
allURL.append(jason2URL)
allURL.append(jason3URL)
allURL.append(saralURL)
allURL.append(sentinel3aURL)
allURL.append(cryosat2URL)
allURL.append(enviURL)
allURL.append(geosatURL)
allURL.append(ersURL)
allURL.append(gfoURL)
allURL.append(topURL)

boxLat = []
boxLon = []
boxWh = []
boxT = []
boxQ = []

In [8]:
latmin = -14.0
latmax = -13.0  
lonmin = 145.0
lonmax = 146.0 

if latmin>latmax:
    print('Error wrong definition of min and max lat!!!')

if lonmin>lonmax:
    print('Error wrong definition of min and max lon!!!')



start_date = dt.datetime(1985,1,1)
end_date = dt.datetime(2019,2,21)

#start_date = dt.datetime(2010,1,1)
#end_date = dt.datetime(2010,12,31)


In [15]:
for u in range(len(allURL)):
    urlON = allURL[u]
    for k in range (len(urlON)):
        ncs = NetCDFFile(urlON[k])
        lats = ncs.variables['LATITUDE'][:]
        lons = ncs.variables['LONGITUDE'][:]   
        if u == 2:
            wh = ncs.variables['SWH_KA_CAL'][:]
            qc = ncs.variables['SWH_KA_quality_control'][:]
        
        else:
            wh = ncs.variables['SWH_KU_CAL'][:]
            qc = ncs.variables['SWH_KU_quality_control'][:]
            
        # Get desired time interval  
        time_var = ncs.variables['TIME']
        tt = ncs.variables['TIME'][:]
        timing = netCDF4.num2date(tt,time_var.units)
        
        #data in correct time, quality flag and lat/lon
        for p in range(len(timing)):
            #print(u,' - ',k,'/',len(urlON),' - ',p,'/',len(timing))
            if timing[p] >= start_date and timing[p] <= end_date:
                if lats[p]>latmin and lats[p]<latmax and lons[p]>lonmin and lons[p]<lonmax:
                    if np.where(np.logical_and(0<wh[p]<10,qc[p]==1)):
                        boxLat.append(lats[p])
                        boxLon.append(lons[p])
                        boxWh.append(wh[p])
                        boxT.append(tt[p])
                        boxQ.append(qc[p])

TypeError: can't compare datetime.datetime to NoneType

In [10]:
for k in range(len(boxLat)):
    if k == 0:
        lat = boxLat[k]
        lon = boxLon[k]
        wh = boxWh[k]
        tt = boxT[k]
        qc = boxQ[k]
    else:
        lat = np.append(lat,boxLat[k])
        lon = np.append(lon,boxLon[k])
        wh = np.append(wh,boxWh[k])
        tt = np.append(tt,boxT[k])
        qc = np.append(qc,boxQ[k])

In [11]:
import pandas as pd
import numpy as np

In [12]:
df = pd.DataFrame(
    {'lat':lat.flatten(),
     'lon':lon.flatten(),
     'wh':wh.flatten(),
     'tt':tt.flatten(),
     'qc':qc.flatten()
    })
nameCSV = 'melville_file.csv'
df.to_csv(str(nameCSV),columns=['lat', 'lon', 'wh', 'tt', 'qc'], sep=' ', index=False ,header=1)

In [13]:
#data = pd.read_csv(str('help'), sep=r'\s+', engine='c', header=None, na_filter=False, \
                                  # dtype=np.float, low_memory=False)
nameCSV = 'melville_file.csv'
data= pd.read_csv(str(nameCSV), sep=r'\s+', engine='c', header=0, na_filter=False, \
                               dtype=np.float, low_memory=False)

lat = data.values[:,0]
lon = data.values[:,1]
wh = data.values[:,2]
tt = data.values[:,3]
qc = data.values[:,4]

In [14]:
len(lat)

36666

In [None]:
df = pd.DataFrame(
    {'lat':lat.flatten(),
     'lon':lon.flatten(),
     'wh':wh.flatten(),
     'tt':tt.flatten(),
     'qc':qc.flatten()
    })

nameCSV = 'test'
df.to_csv(str(nameCSV)+'.csv',columns=['lat', 'lon', 'wh', 'tt', 'qc'], sep=' ', index=False ,header=1)

In [None]:
#df

In [None]:
data = pd.read_csv(str('nameCSV'), sep=r'\s+', engine='c', header=0, na_filter=False, \
                   dtype=np.float, low_memory=False)

 # xyz = pandas.read_csv(str(inDEM), sep=r'\s+', engine='c', header=None, na_filter=False, \
                                   #dtype=numpy.float, low_memory=False)

lat = data.values[:,0]
lon = data.values[:,1]
wh = data.values[:,2]
tt = data.values[:,3]
qc = data.values[:,4]

In [None]:
data = pd.read_csv(str('foreverfile'), sep=r'\s+', engine='c', header=0, na_filter=False, \
                                   dtype=np.float, low_memory=False)
lat = data.values[:,0]
lon = data.values[:,1]
wh = data.values[:,2]
tt = data.values[:,3]
qc = data.values[:,4]

In [None]:
lat

In [None]:
for k in range(len(wh)):
    print wh[k]
    if wh[k]>20:
        print wh[k]

In [None]:
max(wh)
netCDF4.num2date,time_var.units

In [None]:
print len(wh)
print len(lat)
print len(lon)

In [None]:
days = [] #daystart
for k in range(len(wh)):
       
    t1 = netCDF4.num2date(wh[k],time_var.units)
    if k == 0: #get day 0. Then else: every day after that
        days.append(0)
        dd = netCDF4.num2date(wh[k],time_var.units)
        it = 0
    else:
        if t1.day != dd.day: #if day 1 is not equal to day 2, then append
            #print dd.day,t1.day
            days.append(k)
            it += 1
            dd = netCDF4.num2date(wh[k],time_var.units)
       

In [None]:
wh_ = wh[days]
qc_ = qc[days]
lon_ = lon[days]
tt_ = tt[days]
lat_=lat[days]

print len(whpos), 'whpos'
print len(qc_),'qc_'
print len(lon_),'lon_'
print len(tt_),'tt_'
print len(lat_),'lat_'

In [None]:
print 'new days'

print 'mean whpos', np.mean(wh_)
print 'max whpos',max(wh_)
print 'min whpos', min (wh_)

print 'min lon',min(lon_)
print 'min lon',min(lon_)
print 'max lon',max(lon_)

print 'min lat', min(lat_)
print 'max lat',max(lat_)

print min(netCDF4.num2date(tt_,time_var.units))
print max (netCDF4.num2date(tt_,time_var.units))