# Build Full Enviormental Dataset
---
### Carter J. Humphreys
Email: [chumphre@oswego.edu](mailto:chumphre@oswego.edu) | GitHub:[@HumphreysCarter](https://github.com/HumphreysCarter) | Website: [carterhumphreys.com](http://carterhumphreys.com/)

In [136]:
import os
import pandas as pd
import math
import numpy as np

from datetime import datetime, timedelta

## Set Data Paths

In [137]:
dataDir='../data'
bandDataPath=f'{dataDir}/BAND_POSITION'
waterTempPath=f'{dataDir}/WATER_TEMP'
iceCoverPath=f'{dataDir}/ICE_COVER'
enviormentDataPath=f'{dataDir}/BUFKIT'

eventStart=1
eventEnd=36

## Methods

In [138]:
def find_nthIndex(fullString, find, n):
    start = fullString.find(find)
    while start >= 0 and n > 1:
        start = fullString.find(find, start+len(find))
        n -= 1
    return start

# Rounds to nearest hour by adding a timedelta hour if minute >= 30
def roundTimeToNearestHour(t):
    return (t.replace(second=0, microsecond=0, minute=0, hour=t.hour)+timedelta(hours=t.minute//30))

def degreesToRadians(degrees):
    return degrees * (math.pi / 180)

def radiansToDegrees(radians):
    return radians * (180 / math.pi)

# Returns bearing between two lat/lon points
def bearingBetweenPoints(lat1, lon1, lat2, lon2):
    lat1=degreesToRadians(lat1)
    lon1=degreesToRadians(lon1)
    lat2=degreesToRadians(lat2)
    lon2=degreesToRadians(lon2)
    
    brg=math.atan2(math.sin(lon2-lon1)*math.cos(lat2),math.cos(lat1)*math.sin(lat2)-math.sin(lat1)*math.cos(lat2)*math.cos(lon2-lon1))
    brg=radiansToDegrees(brg)
    return brg

# Returns distance between two lat/lon points in KM
def distanceBetweenPoints(lat1, lon1, lat2, lon2):
        theta = lon1 - lon2
        dist = math.sin(degreesToRadians(lat1)) * math.sin(degreesToRadians(lat2)) + math.cos(degreesToRadians(lat1)) * math.cos(degreesToRadians(lat2)) * math.cos(degreesToRadians(theta))
        dist = math.acos(dist)
        dist = radiansToDegrees(dist)
        return (dist * 60) * 1.852 # 1 nm = 1.852 km

# Returns azimuthal angle (in degrees) and distance (in km)
def getAzRng(originLat, originLon, lat, lon):
    rng=distanceBetweenPoints(originLat, originLon, lat, lon)
    az=bearingBetweenPoints(originLat, originLon, lat, lon)
    if az<0:
        az=az+360
    
    return {'Azimuth':az, 'Range':rng}

## Band Postion Data

In [139]:
# Load band postion data into dataframe
data=[]
for id in range(eventStart, eventEnd+1):
    eventFolder=f'Ontario_LES_Event{str(id).zfill(2)}'
    bandData=os.listdir(f'{bandDataPath}/{eventFolder}')
    
    for file in bandData:
        tmp_df = pd.read_csv(f'{bandDataPath}/{eventFolder}/{file}')   
        df_mean = tmp_df.mean()
        
        time=file[find_nthIndex(file, '_', 2)+1:]
        time=datetime.strptime(time.replace('.csv',''), '%Y%m%d_%H%M%S')
        time=roundTimeToNearestHour(time)
        
        latStart=tmp_df.values[0][0]
        lonStart=tmp_df.values[0][1]
        latMid=df_mean['Latitude']
        lonMid=df_mean['Longitude']
        latStop=tmp_df.values[len(tmp_df.values)-1][0]
        lonStop=tmp_df.values[len(tmp_df.values)-1][1]
        dbz=tmp_df.values[0][4]
        
        lo1Lat=43.62
        lo1Lon=-77.41
        AzRng=getAzRng(lo1Lat, lo1Lon, latStart, lonStart)
        az=AzRng['Azimuth']
        rng=AzRng['Range']
        
        data.append([time, latStart, lonStart, latMid, lonMid, latStop, lonStop, dbz, az, rng])
        
posData=pd.DataFrame(data, columns = ['time [UTC]', 'BandStart_Latitude', 'BandStart_Longitude', 'BandMidpoint_Latitude', 'BandMidpoint_Longitude', 'BandEnd_Latitude', 'BandEnd_Longitude', 'BandIntensity [dBZ]', 'BandAz_LO1 [deg]', 'BandRng_LO1 [km]'])
posData=posData.sort_values('time [UTC]')
posData=posData.drop_duplicates()
posData

Unnamed: 0,time [UTC],BandStart_Latitude,BandStart_Longitude,BandMidpoint_Latitude,BandMidpoint_Longitude,BandEnd_Latitude,BandEnd_Longitude,BandIntensity [dBZ],BandAz_LO1 [deg],BandRng_LO1 [km]
14,2015-10-17 10:00:00,43.535946,-76.223320,43.353896,-75.329963,43.234745,-74.595901,-9999.0,95.175078,95.981749
24,2015-10-17 11:00:00,43.532036,-76.217194,43.334646,-75.373511,43.143089,-74.490379,4.0,95.400996,96.518679
32,2015-10-17 12:00:00,43.521149,-76.232010,43.346326,-75.618544,43.195889,-74.990135,1.5,96.200389,95.472330
6,2015-10-17 13:00:00,43.518513,-76.217072,43.339364,-75.832468,43.176296,-75.413177,5.0,96.285512,96.702867
31,2015-10-17 14:00:00,43.565903,-76.202736,43.259891,-75.604233,42.989628,-75.002983,6.5,93.124002,97.344800
...,...,...,...,...,...,...,...,...,...,...
1145,2019-04-01 06:00:00,43.302155,-76.797066,43.181426,-75.945325,42.964691,-75.063576,17.0,125.332050,60.756589
1144,2019-04-01 07:00:00,43.548927,-76.197563,43.250276,-75.728180,42.978962,-75.235283,8.0,94.208553,97.908125
1147,2019-04-01 08:00:00,43.553471,-76.185211,43.228888,-75.635044,42.981030,-75.192307,8.5,93.866284,98.856365
1143,2019-04-01 09:00:00,43.320927,-76.710991,43.121541,-76.320671,42.869659,-75.870171,12.5,120.280638,65.436882


## Enviorment Data

In [140]:
# Load enviorment data into dataframe
env_df=pd.DataFrame()
eventData=[]
for id in range(eventStart, eventEnd+1):
    event_df=pd.DataFrame()
    eventFolder=f'Ontario_LES_Event{str(id).zfill(2)}'
    envData=os.listdir(f'{enviormentDataPath}/{eventFolder}')
    
    for dataFile in envData:
        try:
            df=pd.read_csv(f'{enviormentDataPath}/{eventFolder}/{dataFile}', parse_dates=['time [UTC]'], infer_datetime_format=True)
            df=df.drop(['model', 'station'], axis=1)

            if len(event_df) == 0:
                event_df=df
            else:
                event_df=pd.merge(event_df, df, on='time [UTC]', how='inner')
        except:
            print(dataFile)
            
    eventData.append(event_df)

env_df=pd.concat(eventData)
env_df=env_df.sort_values('time [UTC]')
env_df=env_df.drop_duplicates()
env_df

Unnamed: 0,time [UTC],z_925mb [m]_x,T_925mb [degC]_x,RH_925mb [%]_x,u_925mb [kt]_x,v_925mb [kt]_x,z_850mb [m]_x,T_850mb [degC]_x,RH_850mb [%]_x,u_850mb [kt]_x,...,z_700mb [m]_y,T_700mb [degC]_y,RH_700mb [%]_y,u_700mb [kt]_y,v_700mb [kt]_y,z_500mb [m]_y,T_500mb [degC]_y,RH_500mb [%]_y,u_500mb [kt]_y,v_500mb [kt]_y
0,2015-10-17 10:00:00,803.81,-2.83,74.37,6.59,-15.85,1467.96,-7.52,73.32,12.35,...,2923.61,-13.54,15.11,26.70,-7.37,5401.54,-29.67,25.71,38.48,-1.77
1,2015-10-17 11:00:00,804.51,-1.72,68.59,7.45,-15.29,1470.55,-7.60,99.12,15.62,...,2927.03,-14.12,14.21,27.32,-9.45,5399.77,-30.21,25.77,37.47,-3.50
2,2015-10-17 12:00:00,810.72,-1.56,64.15,7.17,-16.18,1476.62,-7.45,74.68,14.95,...,2927.59,-14.74,23.99,20.99,-11.98,5396.19,-30.40,24.93,37.17,-5.82
3,2015-10-17 13:00:00,816.24,-1.11,58.38,7.55,-17.10,1483.09,-7.00,58.03,12.84,...,2934.41,-15.24,33.86,17.20,-14.21,5399.68,-30.47,20.28,37.19,-4.76
4,2015-10-17 14:00:00,817.79,-1.43,66.92,6.55,-15.72,1484.12,-7.00,61.74,10.13,...,2937.42,-15.73,38.57,19.43,-12.18,5401.98,-30.02,15.75,37.58,-2.74
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9,2019-04-01 06:00:00,749.57,-6.41,91.26,23.41,-3.79,1403.42,-12.05,88.87,24.87,...,2872.08,-21.48,76.45,22.90,-3.56,5286.53,-33.67,10.18,31.66,14.61
10,2019-04-01 07:00:00,756.90,-7.09,86.11,25.09,-5.27,1409.37,-12.29,84.99,26.45,...,2879.69,-21.78,78.21,24.82,-2.49,5288.65,-34.46,11.02,33.55,9.89
11,2019-04-01 08:00:00,753.46,-7.76,90.77,24.95,-6.80,1404.78,-12.38,83.38,28.97,...,2873.55,-22.18,78.29,27.00,-4.13,5278.53,-34.90,12.60,34.36,4.09
12,2019-04-01 09:00:00,758.64,-8.12,90.23,23.68,-5.62,1408.81,-12.91,84.44,25.52,...,2875.96,-22.04,71.96,29.91,-5.31,5280.02,-35.27,14.08,36.31,-0.16


## Ice Cover Data

In [141]:
# Lake Ontario Ice Cover
ice_ont_df=pd.read_csv(f'{iceCoverPath}/ont.csv')
ice_ont_df=ice_ont_df.drop(['date', 'jday.1', 'date.1'], axis=1)
ice_ont_df=ice_ont_df.sort_values('jday')
ice_ont_df

Unnamed: 0,jday,1973,1974,1975,1976,1977,1978,1979,1980,1981,...,2011,2012,2013,2014,2015,2016,2017,2018,2019,Average
48,1.0,2.7,5.1,0.0,2.8,19.7,5.7,2.2,0.0,28.2,...,1.65,0.65,0.68,4.95,0.5,0.10,0.85,14.67,0.87,3.33
49,2.0,2.7,7.1,1.1,4.0,20.7,5.7,2.2,0.7,31.7,...,1.32,0.66,0.66,6.24,0.6,0.10,0.73,19.10,0.86,3.85
50,3.0,5.6,9.5,0.9,5.1,21.6,5.7,2.1,0.9,34.7,...,1.63,0.34,0.71,19.33,0.7,0.10,0.90,22.63,1.07,4.59
51,4.0,7.7,11.5,0.8,6.3,22.7,5.7,2.1,1.0,38.2,...,1.17,0.49,1.05,14.13,0.8,0.10,0.90,11.89,1.09,4.74
52,5.0,10.5,13.8,0.6,6.5,23.7,6.3,2.0,1.0,41.1,...,1.16,0.90,1.22,9.72,0.8,0.62,1.19,11.78,0.96,5.02
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
43,361.0,2.6,0.0,0.0,1.8,12.2,5.0,0.7,0.0,12.4,...,1.05,0.00,0.67,1.83,0.1,0.00,0.92,2.44,1.04,1.87
44,362.0,2.9,0.0,0.0,1.7,13.9,5.6,1.0,0.0,15.8,...,0.99,0.00,0.58,1.27,0.1,0.00,0.92,4.80,0.85,2.11
45,363.0,2.9,0.0,0.0,1.6,15.5,5.7,1.3,0.0,18.8,...,1.01,0.28,0.62,1.59,0.1,0.00,0.86,9.62,0.84,2.46
46,364.0,2.8,0.0,0.0,1.5,17.2,5.7,1.6,0.0,22.3,...,3.25,0.64,0.76,2.01,0.2,0.31,0.52,11.77,0.84,2.79


In [142]:
# Lake Erie Ice Cover
ice_eri_df=pd.read_csv(f'{iceCoverPath}/eri.csv')
ice_eri_df=ice_eri_df.drop(['date', 'jday.1', 'date.1'], axis=1)
ice_eri_df=ice_eri_df.sort_values('jday')
ice_eri_df

Unnamed: 0,jday,1973,1974,1975,1976,1977,1978,1979,1980,1981,...,2011,2012,2013,2014,2015,2016,2017,2018,2019,Average
48,1.0,2.7,10.2,0.0,14.8,84.2,44.1,5.0,0.0,61.5,...,26.94,0.00,0.72,24.67,0.7,0.00,1.52,39.56,0.0,16.57
49,2.0,3.4,14.6,1.1,14.9,86.5,46.7,15.6,10.7,66.9,...,18.47,0.00,0.47,34.98,1.4,0.00,1.55,36.02,0.0,18.12
50,3.0,6.7,19.4,1.2,14.9,89.0,49.3,23.0,10.4,71.9,...,12.92,0.00,4.86,55.69,2.6,0.00,1.78,46.79,0.0,20.22
51,4.0,9.5,23.8,1.2,15.0,91.3,51.6,30.5,10.0,77.4,...,16.57,0.41,6.30,57.96,0.7,0.00,4.17,59.38,0.0,21.95
52,5.0,12.8,28.6,1.4,15.1,93.3,56.9,38.1,9.6,82.5,...,11.91,0.28,9.68,39.70,3.6,0.49,4.57,77.62,0.0,23.36
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
43,361.0,0.0,0.0,0.0,11.7,63.2,28.8,1.8,0.0,35.2,...,18.76,0.00,0.39,19.13,0.0,0.00,7.13,14.44,0.0,10.75
44,362.0,0.0,0.0,0.0,12.6,68.8,32.4,2.5,0.0,40.7,...,20.39,0.00,0.34,9.70,0.0,0.00,5.98,18.43,0.0,11.88
45,363.0,0.7,0.0,0.0,13.5,74.4,36.2,3.1,0.0,45.7,...,19.84,0.00,0.36,9.24,0.0,0.00,6.79,26.38,0.0,13.12
46,364.0,1.4,0.0,0.0,14.5,80.0,38.8,3.8,0.0,51.2,...,29.81,0.00,0.48,8.95,0.0,0.00,2.42,30.05,0.0,14.32


In [143]:
# Lake Huron Ice Cover
ice_hur_df=pd.read_csv(f'{iceCoverPath}/hur.csv')
ice_hur_df=ice_hur_df.drop(['date', 'jday.1', 'date.1'], axis=1)
ice_hur_df=ice_hur_df.sort_values('jday')
ice_hur_df

Unnamed: 0,jday,1973,1974,1975,1976,1977,1978,1979,1980,1981,...,2011,2012,2013,2014,2015,2016,2017,2018,2019,Average
48,1.0,19.3,11.7,0.0,16.8,44.0,21.4,15.0,0.0,40.2,...,10.61,3.97,5.23,31.65,7.9,0.35,3.86,30.69,8.69,12.46
49,2.0,21.1,14.7,2.0,16.2,45.5,21.9,20.2,8.5,43.0,...,10.57,2.54,5.75,35.30,12.7,0.61,3.78,31.46,10.27,13.54
50,3.0,22.5,18.2,2.2,15.7,47.4,22.4,23.7,9.2,45.7,...,7.24,8.09,7.83,44.05,13.9,0.80,5.44,33.50,9.91,14.57
51,4.0,23.7,21.1,2.4,15.1,49.0,22.5,27.4,9.3,48.5,...,7.24,7.38,6.76,44.60,12.9,1.56,5.82,30.16,7.32,15.30
52,5.0,25.1,24.7,2.7,14.4,50.7,23.5,31.0,9.5,51.1,...,9.48,7.30,7.59,29.63,16.7,3.94,6.22,37.69,6.46,16.25
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
43,361.0,13.6,0.0,0.0,21.4,35.6,17.5,5.1,0.0,26.7,...,9.21,1.16,2.19,23.20,0.9,0.00,3.83,16.39,5.06,8.93
44,362.0,11.7,0.0,0.0,20.5,37.0,19.1,7.1,0.0,29.6,...,11.88,1.16,2.78,17.30,1.1,0.02,3.38,21.74,4.24,9.21
45,363.0,13.8,0.0,0.0,19.7,38.4,20.0,9.0,0.0,32.2,...,14.28,3.04,4.90,18.31,1.5,0.29,4.46,24.20,4.03,9.92
46,364.0,15.6,0.0,0.0,18.0,39.7,20.5,11.1,0.0,35.0,...,12.13,3.98,4.37,19.67,2.6,0.38,3.60,25.02,6.54,10.50


## Water Temperature Data

In [144]:
# Load water temperature data into dataframe
waterTemp_df = pd.DataFrame()
waterTempData=os.listdir(f'{waterTempPath}/')
for dataFile in waterTempData:  
    tmp_df = pd.read_csv(f'{waterTempPath}/{dataFile}', skiprows=[0,1,2,3,4,5,6,8,9], delim_whitespace=True) 
    
    if len(df.values) == 0:
        waterTemp_df = tmp_df
    else:
        waterTemp_df=waterTemp_df.append(tmp_df)
        
years = waterTemp_df['Year'].values
days = waterTemp_df['Day'].values
dates = []

for year, day in zip(years, days):
    dates.append(datetime.strptime(f'{year}_{day}', '%Y_%j'))

waterTemp_df.insert(0, 'Date', dates)
waterTemp_df=waterTemp_df.drop(['Year', 'Day'], axis=1)
waterTemp_df=waterTemp_df.rename(columns={'Sup.':'Superior Lake', 'Mich.':'Lake Michigan', 'Huron':'Lake Huron', 'Erie':'Lake Erie', 'Ont.':'Lake Ontario', 'St.Clr':'Lake Saint Clair'})
waterTemp_df=waterTemp_df.sort_values('Date')
waterTemp_df

Unnamed: 0,Date,Superior Lake,Lake Michigan,Lake Huron,Lake Erie,Lake Ontario,Lake Saint Clair
0,2015-01-01,2.59,3.53,3.26,3.33,4.43,2.32
1,2015-01-02,2.46,3.50,3.17,3.31,4.32,1.80
2,2015-01-03,2.36,3.49,3.05,3.26,4.28,0.92
3,2015-01-04,2.18,3.36,2.68,3.28,4.23,1.74
4,2015-01-05,2.06,3.16,2.42,3.01,4.04,0.22
...,...,...,...,...,...,...,...
360,2019-12-27,3.36,4.47,3.73,4.09,4.54,2.48
361,2019-12-28,3.37,4.36,3.69,4.05,4.50,2.66
362,2019-12-29,3.36,4.32,3.65,4.03,4.48,2.77
363,2019-12-30,3.34,4.28,3.62,4.01,4.47,2.85


In [145]:
# Combine water temperature into enviorment dataframe
env_df.insert(1, 'WaterTemp_Ontario', np.nan)
env_df.insert(2, 'IceCover_Ontario', np.nan)
env_df.insert(3, 'IceCover_Huron', np.nan)
env_df.insert(4, 'IceCover_Erie', np.nan)

dates=waterTemp_df['Date'].tolist()
waterTemps=waterTemp_df['Lake Ontario'].tolist()
iceJday=ice_df['jday'].tolist()

index = 0
for i, env_row in env_df.iterrows():
    envDate=env_row['time [UTC]'].date()
    
    ice_ont=ice_ont_df[envDate.strftime('%Y')].tolist()
    ice_eri=ice_eri_df[envDate.strftime('%Y')].tolist()
    ice_hur=ice_hur_df[envDate.strftime('%Y')].tolist()
        
    try:
        k=iceJday.index(int(envDate.strftime('%j')))
    except:
        k=-9999
    
    for j in range(len(dates)):
        waterTempDate=dates[j].date()  
        
        if waterTempDate == envDate:
            env_df.iloc[index, env_df.columns.get_loc('WaterTemp_Ontario')] = waterTemps[j]
            if k == -9999:
                env_df.iloc[index, env_df.columns.get_loc('IceCover_Ontario')] = 0.0
                env_df.iloc[index, env_df.columns.get_loc('IceCover_Erie')] = 0.0
                env_df.iloc[index, env_df.columns.get_loc('IceCover_Huron')] = 0.0
            else:
                env_df.iloc[index, env_df.columns.get_loc('IceCover_Ontario')] = ice_ont[k]
                env_df.iloc[index, env_df.columns.get_loc('IceCover_Erie')] = ice_eri[k]
                env_df.iloc[index, env_df.columns.get_loc('IceCover_Huron')] = ice_hur[k]
            break
    
    index+=1
            
env_df

Unnamed: 0,time [UTC],WaterTemp_Ontario,IceCover_Ontario,IceCover_Huron,IceCover_Erie,z_925mb [m]_x,T_925mb [degC]_x,RH_925mb [%]_x,u_925mb [kt]_x,v_925mb [kt]_x,...,z_700mb [m]_y,T_700mb [degC]_y,RH_700mb [%]_y,u_700mb [kt]_y,v_700mb [kt]_y,z_500mb [m]_y,T_500mb [degC]_y,RH_500mb [%]_y,u_500mb [kt]_y,v_500mb [kt]_y
0,2015-10-17 10:00:00,13.26,0.00,0.00,0.00,803.81,-2.83,74.37,6.59,-15.85,...,2923.61,-13.54,15.11,26.70,-7.37,5401.54,-29.67,25.71,38.48,-1.77
1,2015-10-17 11:00:00,13.26,0.00,0.00,0.00,804.51,-1.72,68.59,7.45,-15.29,...,2927.03,-14.12,14.21,27.32,-9.45,5399.77,-30.21,25.77,37.47,-3.50
2,2015-10-17 12:00:00,13.26,0.00,0.00,0.00,810.72,-1.56,64.15,7.17,-16.18,...,2927.59,-14.74,23.99,20.99,-11.98,5396.19,-30.40,24.93,37.17,-5.82
3,2015-10-17 13:00:00,13.26,0.00,0.00,0.00,816.24,-1.11,58.38,7.55,-17.10,...,2934.41,-15.24,33.86,17.20,-14.21,5399.68,-30.47,20.28,37.19,-4.76
4,2015-10-17 14:00:00,13.26,0.00,0.00,0.00,817.79,-1.43,66.92,6.55,-15.72,...,2937.42,-15.73,38.57,19.43,-12.18,5401.98,-30.02,15.75,37.58,-2.74
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9,2019-04-01 06:00:00,2.23,4.63,19.43,7.79,749.57,-6.41,91.26,23.41,-3.79,...,2872.08,-21.48,76.45,22.90,-3.56,5286.53,-33.67,10.18,31.66,14.61
10,2019-04-01 07:00:00,2.23,4.63,19.43,7.79,756.90,-7.09,86.11,25.09,-5.27,...,2879.69,-21.78,78.21,24.82,-2.49,5288.65,-34.46,11.02,33.55,9.89
11,2019-04-01 08:00:00,2.23,4.63,19.43,7.79,753.46,-7.76,90.77,24.95,-6.80,...,2873.55,-22.18,78.29,27.00,-4.13,5278.53,-34.90,12.60,34.36,4.09
12,2019-04-01 09:00:00,2.23,4.63,19.43,7.79,758.64,-8.12,90.23,23.68,-5.62,...,2875.96,-22.04,71.96,29.91,-5.31,5280.02,-35.27,14.08,36.31,-0.16


## Export Dataset

In [146]:
# Merge enviorment data and band postion data
dataset=pd.merge(posData, env_df, on='time [UTC]')
dataset.to_csv(f'{dataDir}/full_dataset.csv', header=False, index=False)
dataset=dataset.rename(columns={'time [UTC]':'DateTime [UTC]'})
dataset=dataset.drop_duplicates()
dataset

Unnamed: 0,DateTime [UTC],BandStart_Latitude,BandStart_Longitude,BandMidpoint_Latitude,BandMidpoint_Longitude,BandEnd_Latitude,BandEnd_Longitude,BandIntensity [dBZ],BandAz_LO1 [deg],BandRng_LO1 [km],...,z_700mb [m]_y,T_700mb [degC]_y,RH_700mb [%]_y,u_700mb [kt]_y,v_700mb [kt]_y,z_500mb [m]_y,T_500mb [degC]_y,RH_500mb [%]_y,u_500mb [kt]_y,v_500mb [kt]_y
0,2015-10-17 10:00:00,43.535946,-76.223320,43.353896,-75.329963,43.234745,-74.595901,-9999.0,95.175078,95.981749,...,2923.61,-13.54,15.11,26.70,-7.37,5401.54,-29.67,25.71,38.48,-1.77
1,2015-10-17 11:00:00,43.532036,-76.217194,43.334646,-75.373511,43.143089,-74.490379,4.0,95.400996,96.518679,...,2927.03,-14.12,14.21,27.32,-9.45,5399.77,-30.21,25.77,37.47,-3.50
2,2015-10-17 12:00:00,43.521149,-76.232010,43.346326,-75.618544,43.195889,-74.990135,1.5,96.200389,95.472330,...,2927.59,-14.74,23.99,20.99,-11.98,5396.19,-30.40,24.93,37.17,-5.82
3,2015-10-17 13:00:00,43.518513,-76.217072,43.339364,-75.832468,43.176296,-75.413177,5.0,96.285512,96.702867,...,2934.41,-15.24,33.86,17.20,-14.21,5399.68,-30.47,20.28,37.19,-4.76
4,2015-10-17 14:00:00,43.565903,-76.202736,43.259891,-75.604233,42.989628,-75.002983,6.5,93.124002,97.344800,...,2937.42,-15.73,38.57,19.43,-12.18,5401.98,-30.02,15.75,37.58,-2.74
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1140,2019-04-01 06:00:00,43.302155,-76.797066,43.181426,-75.945325,42.964691,-75.063576,17.0,125.332050,60.756589,...,2872.08,-21.48,76.45,22.90,-3.56,5286.53,-33.67,10.18,31.66,14.61
1141,2019-04-01 07:00:00,43.548927,-76.197563,43.250276,-75.728180,42.978962,-75.235283,8.0,94.208553,97.908125,...,2879.69,-21.78,78.21,24.82,-2.49,5288.65,-34.46,11.02,33.55,9.89
1142,2019-04-01 08:00:00,43.553471,-76.185211,43.228888,-75.635044,42.981030,-75.192307,8.5,93.866284,98.856365,...,2873.55,-22.18,78.29,27.00,-4.13,5278.53,-34.90,12.60,34.36,4.09
1143,2019-04-01 09:00:00,43.320927,-76.710991,43.121541,-76.320671,42.869659,-75.870171,12.5,120.280638,65.436882,...,2875.96,-22.04,71.96,29.91,-5.31,5280.02,-35.27,14.08,36.31,-0.16


## Dataset Statistics

In [147]:
stats=dataset.describe()
stats.to_csv(f'{dataDir}/full_dataset_statistics.csv', header=True)
stats

Unnamed: 0,BandStart_Latitude,BandStart_Longitude,BandMidpoint_Latitude,BandMidpoint_Longitude,BandEnd_Latitude,BandEnd_Longitude,BandIntensity [dBZ],BandAz_LO1 [deg],BandRng_LO1 [km],WaterTemp_Ontario,...,z_700mb [m]_y,T_700mb [degC]_y,RH_700mb [%]_y,u_700mb [kt]_y,v_700mb [kt]_y,z_500mb [m]_y,T_500mb [degC]_y,RH_500mb [%]_y,u_500mb [kt]_y,v_500mb [kt]_y
count,1145.0,1145.0,1145.0,1145.0,1145.0,1145.0,1145.0,1145.0,1145.0,1145.0,...,1145.0,1145.0,1145.0,1145.0,1145.0,1145.0,1145.0,1145.0,1145.0,1145.0
mean,43.630653,-76.381776,43.633758,-75.787178,43.639705,-75.142739,-463.378603,93.641756,87.640164,4.608821,...,2828.591878,-21.168114,58.476978,32.595633,5.77062,5257.639668,-33.220926,35.622585,45.839057,11.509223
std,0.233597,0.279259,0.348498,0.408128,0.519221,0.670774,2142.942481,22.816799,18.150216,2.408046,...,63.173232,5.053155,22.238123,9.916492,14.014891,98.84491,4.916704,20.163118,20.474141,24.101842
min,43.24379,-77.509819,42.907711,-77.060097,42.608696,-76.93824,-9999.0,48.341748,38.160649,1.35,...,2702.81,-33.68,0.75,6.8,-36.19,5039.5,-45.61,1.77,-16.11,-60.4
25%,43.446945,-76.559319,43.339364,-76.03568,43.210049,-75.653412,9.5,78.997491,71.131557,2.82,...,2778.45,-24.8,42.97,25.84,-3.62,5186.38,-36.86,18.79,32.93,-5.63
50%,43.617672,-76.225258,43.61902,-75.743159,43.589722,-75.121086,18.0,89.729152,96.965336,3.97,...,2829.57,-21.01,64.0,31.93,6.42,5262.92,-33.24,33.44,44.2,11.34
75%,43.780396,-76.199753,43.901171,-75.471099,44.032555,-74.616837,24.5,105.391728,97.859283,6.14,...,2868.63,-17.41,77.42,38.71,14.77,5328.24,-29.26,51.37,59.72,29.42
max,44.314522,-76.05056,44.518521,-74.867294,44.987915,-73.680901,41.5,190.940376,130.345075,13.26,...,3006.95,-7.81,99.36,68.05,53.07,5562.49,-19.31,97.64,103.27,100.25
