In [1]:
%matplotlib
import matplotlib.pyplot as plt
import matplotlib 
import pandas as pd
import os
import numpy as np 

#Directory
os.chdir('C:\\Users\\Garrett\\Google Drive\\Documents\\Journal\\Surf Data Project')

#Retrieve the data from 2018-2020:
df_181920 = pd.read_csv('C:\\Users\\Garrett\\Google Drive\\Documents\\Journal\\Surf Data Project\\data_exported\\df_181920.csv')
df_181920


Using matplotlib backend: Qt5Agg


Unnamed: 0,Date,Year,Month,Day,WH_avg,wv_ql,Spot,Region
0,2018-01-04,2018,1,4,5.5,10.0,Blacks,SD
1,2018-01-05,2018,1,5,5.5,8.0,Blacks,SD
2,2018-01-06,2018,1,6,3.5,3.0,Blacks,SD
3,2018-01-07,2018,1,7,3.5,3.0,Easter St,SLO
4,2018-01-11,2018,1,11,3.5,9.0,South Studios,SLO
...,...,...,...,...,...,...,...,...
661,2020-12-26,2020,12,26,3.5,7.0,Del Mar,SD
662,2020-12-27,2020,12,27,6.5,8.0,Georges,SD
663,2020-12-29,2020,12,29,5.5,5.0,Georges,SD
664,2020-12-30,2020,12,30,3.5,6.0,Torrey Pines,SD


In [2]:
#Figure out avg wave height per spot
df_spot_wv_avg = pd.DataFrame()
df_spot_wv_avg['wv_hgt'] = round(df_181920.groupby('Spot')['WH_avg'].mean(), 2) #Find mean of the spot and round the value
df_spot_wv_avg['spot_count'] = df_181920.groupby('Spot')['WH_avg'].count() # Add the counts
df_spot_wv_avg['region'] = df_181920.groupby('Spot')['Region'].min() # Add region

#Removes colums if the spot was only surfed x amount of times
df_spot_wv_avg = df_spot_wv_avg.drop(df_spot_wv_avg[df_spot_wv_avg['spot_count'] < 15].index) 
#Sort by certain column
df_spot_wv_avg = df_spot_wv_avg.sort_values(by='wv_hgt', ascending = False) #sorts by the wave quality
# Reset the index so 'Spot' can be indexed better
df_spot_wv_avg = df_spot_wv_avg.reset_index()

df_spot_wv_avg

Unnamed: 0,Spot,wv_hgt,spot_count,region
0,Ocean Beach,5.61,22,SF
1,The Rock,4.84,29,SLO
2,The Pit,4.61,96,SLO
3,Blacks,4.55,50,SD
4,South Jetty,4.42,20,SLO
5,Cayucos Pier,4.16,25,SLO
6,Studios,4.1,20,SLO
7,Sandspit,3.94,66,SLO
8,Seaside,3.92,33,SD
9,Seascape,3.08,24,SD


In [64]:
import matplotlib.patches as mpatches

#For colored bars based on region:
region_cat = df_spot_wv_avg["region"].astype("category")
region_codes = region_cat.cat.codes.values
cmap = np.array(['#A6D0BD', #Greenish
                 '#F0D37D', #Yellowish
                 '#5CA3BB']) #Blueish

# plt.cm.Set2(range(df_spot_wv_avg["region"].unique().shape[0]))
patches = []
for code in region_cat.cat.codes.unique():
    cat = region_cat.cat.categories[code]
    patches.append(mpatches.Patch(color=cmap[code], label=cat))

#Set up fig
fig, ax = plt.subplots(figsize=(22, 12))

#Bar plot
bars = ax.bar(
    df_spot_wv_avg['Spot'], 
    df_spot_wv_avg['wv_hgt'],
    width = 0.8,
    color=cmap[region_codes])

#Legend!
ax.legend(handles=patches)

# Axis formatting.
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_color('#DDDDDD')
ax.tick_params(bottom=False, left=False)
ax.set_axisbelow(True)
ax.yaxis.grid(True, color='#EEEEEE')
ax.xaxis.grid(False)
for label in ax.get_xticklabels():
    label.set_rotation(30)

# Add text annotations to the top of the bars.
for bar in bars:
  ax.text(
      bar.get_x() + bar.get_width() / 2,
      bar.get_height() + 0.1,
      round(bar.get_height(), 1),
      horizontalalignment='center',
      color='#333333',
)

#Labeling
ax.set_xlabel('Location', labelpad=15, color='#333333', weight='bold', )
ax.set_ylabel(
     'Wave Height Averaged (ft)', 
      labelpad=30, 
      color='#333333', 
      weight='bold',
      y=0.23)

plt.title('Wave Height Across Locations', loc='left',
         pad=30, color='#333333', weight='bold')

plt.show()

In [55]:
#This functions input the year and outputs the monthly average wave height and most common region for that year
def month_avg(year):
    df = pd.DataFrame()
    df['waveh'] = df_181920.loc[df_181920['Year'] == year, 'WH_avg']
    df['month'] = df_181920.loc[df_181920['Year'] == year, 'Month']
    df['region'] = df_181920.loc[df_181920['Year'] == year, 'Region']
    df_output = pd.DataFrame()
    df_output['month_avg_ft'] = df.groupby('month')['waveh'].mean()   
    df_output['year'] = year
    df_output['month'] = np.arange(1, 13) 
    df_output['day'] = 1
    df_output['region'] = df.groupby('month')['region'].agg(lambda x: x.value_counts().index[0])
    return df_output

#calling the function above and creating a new dataframe with the month, year, and month avg
df_wh = pd.concat([month_avg(2018), month_avg(2019), month_avg(2020)], ignore_index=True)
df_wh = round(df_wh, 2)

import datetime
df_wh['date'] = pd.to_datetime(df_wh[['year', 'month', 'day']])


df_wh

Unnamed: 0,month_avg_ft,year,month,day,region,date
0,5.26,2018,1,1,SLO,2018-01-01
1,3.69,2018,2,1,SLO,2018-02-01
2,5.41,2018,3,1,SLO,2018-03-01
3,5.18,2018,4,1,SLO,2018-04-01
4,3.83,2018,5,1,SLO,2018-05-01
5,3.28,2018,6,1,SLO,2018-06-01
6,3.35,2018,7,1,SLO,2018-07-01
7,3.5,2018,8,1,SLO,2018-08-01
8,5.04,2018,9,1,NZ,2018-09-01
9,4.74,2018,10,1,SLO,2018-10-01


In [7]:
# Wave Height Data from CenCOOS

#Import data from cencoos
df_Torrey = pd.read_csv('C:\\Users\\Garrett\\Google Drive\\Documents\\Journal\\Surf Data Project\\data_raw\\TPN_SigWaveHeight_month_v2.csv')
df_Diablo = pd.read_csv('C:\\Users\\Garrett\\Google Drive\\Documents\\Journal\\Surf Data Project\\data_raw\\Diablo_SigWaveHeight_month_v2.csv')

#Change object column to datetime
df_Torrey['Date'] = pd.to_datetime(df_Torrey['Start date'])
df_Diablo['Date'] = pd.to_datetime(df_Diablo['Start date'])

# Pulling out the month/year to plot
df_Torrey['Month'] = df_Torrey['Date'].dt.month
df_Diablo['Month'] = df_Diablo['Date'].dt.month
df_Torrey['Year'] = df_Torrey['Date'].dt.year
df_Diablo['Year'] = df_Diablo['Date'].dt.year

#Rounding & turn meters into feet
df_Torrey['sig_height_ft'] = round(df_Torrey['Mean']*3.28084, 2)
df_Diablo['sig_height_ft'] = round(df_Diablo['Mean']*3.28084, 2)

df_Diablo.head()
## df_Torrey

Unnamed: 0,Start date,Standard deviation,Mean,Date,Month,Year,sig_height_ft
0,1/1/2018,0.749526,1.874328,2018-01-01,1,2018,6.15
1,2/1/2018,0.484922,1.5,2018-02-01,2,2018,4.92
2,3/1/2018,0.540562,1.470256,2018-03-01,3,2018,4.82
3,4/1/2018,0.53472,1.649305,2018-04-01,4,2018,5.41
4,5/1/2018,0.401979,1.465431,2018-05-01,5,2018,4.81


In [63]:
#Matplotlib figs: own data & buoy data

#Fig 2: TP buoy with SD Data

fig1, ax = plt.subplots(figsize=(18, 12)) # Create figure and plot space

#Data
SD_time = df_wh.loc[df_wh['region'] == 'SD', 'date']
SD_wave_ht = df_wh.loc[df_wh['region'] == 'SD', 'month_avg_ft']

#Labels and title
plt.xlabel('Date (Year-Month)')
plt.ylabel('Wave Height (ft)')
plt.title('Wave Height in SD & SLO (from CenCOOS Buoys)')

# Axis lines
ax.spines['left'].set_color('#212121')
ax.spines['left'].set_linewidth(.5)
ax.spines['bottom'].set_color('#212121')
ax.spines['bottom'].set_linewidth(.5)
for label in ax.get_xticklabels():
    label.set_rotation(40)

plt.rcParams.update({'font.size': 22})

#Data plotted
ax.plot(df_Torrey['Date'], df_Torrey['sig_height_ft'], color = '#7eb0d9', lw=2) 
ax.plot(SD_time, SD_wave_ht, color = '#E07F2C', lw=2) 

plt.show()

#Fig 3: Diablo buoy with SLO Data
fig3, ax = plt.subplots(figsize=(18, 12)) # Create figure and plot space

#Data
SLO_time = df_wh.loc[df_wh['region'] == 'SLO', 'date']
SLO_wave_ht = df_wh.loc[df_wh['region'] == 'SLO', 'month_avg_ft']

#Labels and title
plt.xlabel('Date (Year-Month)')
plt.ylabel('Wave Height (ft)')
plt.title('Wave Height in SD & SLO (from CenCOOS Buoys)')

# Axis lines
ax.spines['left'].set_color('#212121')
ax.spines['left'].set_linewidth(.5)
ax.spines['bottom'].set_color('#212121')
ax.spines['bottom'].set_linewidth(.5)
for label in ax.get_xticklabels():
    label.set_rotation(40)


plt.rcParams.update({'font.size': 22})

#Data plotted
ax.plot(df_Diablo['Date'], df_Diablo['sig_height_ft'], color = '#7eb0d9', lw=2) 
ax.scatter(SLO_time, SLO_wave_ht, color = '#E07F2C', lw=2) 

plt.show()

In [67]:
#Fig 1: Wave Height from personal Data (Broken into years)
#Using Plotly GO (https://plotly.com/python/line-charts/ for some good examples)

#Libraries
import plotly.graph_objects as go
import numpy as np

#Data
title = 'Wave Height across Years (binned & averaged by month)'
labels = ['2018', '2019', '2020']
xMonth = df_wh.loc[df_wh['year'] == 2018, 'month'] #doesn't matter which year, all the months are the same
y2018 = df_wh.loc[df_wh['year'] == 2018, 'month_avg_ft']
y2019 = df_wh.loc[df_wh['year'] == 2019, 'month_avg_ft']
y2020 = df_wh.loc[df_wh['year'] == 2020, 'month_avg_ft']

layout = go.Layout(legend=dict(orientation="h"),
                  title = title)

fig = go.Figure(layout=layout)

fig.add_trace(go.Scatter(x = xMonth, y = y2018, 
    line=dict(color="#ffa600"),
    name="2018",
    hovertemplate=
    "<b>2018</b><br><br>" +
    "Month: %{x:f}<br>" +
    "Height: %{y:0}<br>" +
    "<extra></extra>"))

fig.add_trace(go.Scatter(x = xMonth, y = y2019, 
    line=dict(color="#bc5090"),
    name="2019",
    hovertemplate=
    "<b>2019</b><br><br>" +
    "Month: %{x:f}<br>" +
    "Height: %{y:0}<br>" +
    "<extra></extra>"))

fig.add_trace(go.Scatter(x = xMonth, y = y2020, 
    line=dict(color="#003f5c"),
    name="2020",
    hovertemplate=
    "<b>2020</b><br><br>" +
    "Month: %{x:f}<br>" +
    "Height: %{y:0}<br>" +
    "<extra></extra>"
))

fig.update_layout(
    xaxis=dict(
        showline=True,
        showgrid=False,
        showticklabels=True,
        linecolor='rgb(204, 204, 204)',
        linewidth=2,
        ticks='outside',
        tickfont=dict(
            family='Arial',
            size=16,
            color='rgb(82, 82, 82)',
        ),
    ),
    yaxis=dict(
        showgrid=False,
        zeroline=True,
        showline=True,
        showticklabels=True,
        linecolor='rgb(204, 204, 204)',
        ticks='outside',
        tickfont=dict(
            family='Arial',
            size=16,
            color='rgb(82, 82, 82)',
        )
    ),
    autosize=False,
    margin=dict(
        autoexpand=False,
        l=100,
        r=20,
        t=110,
    ),
    plot_bgcolor='white'
)

fig.update_xaxes(
        ticktext=["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"],
        tickvals=[1,2,3,4 ,5 ,6,7,8,9,10,11,12],    
        title_text = "Month of the Year",
        title_font = {"size": 16},
        title_standoff = 25)

fig.update_yaxes(
        title_text = "Wave Height (Avg)",
        title_font = {"size": 16},
        title_standoff = 25
)

fig.update_traces(mode='lines',
                 showlegend = True)

fig.update_layout(legend=dict(font_size=12, yanchor="top", y=1.21, xanchor="left", x = -0.115),
                  hoverlabel=dict(
                      bgcolor="white",
                      font_size=16,
                      font_family="Arial"))
fig.show()

py.plot(fig, filename = 'Recorded_Wave_Height_Across_Years', auto_open=True)
print(tls.get_embed('https://plotly.com/~garrettschmid/26/')) #Gets the embed link for squarespace

<iframe id="igraph" scrolling="no" style="border:none;" seamless="seamless" src="https://plotly.com/~garrettschmid/26.embed" height="525" width="100%"></iframe>


In [68]:
#Fig 2: Wave Height from CenCOOS Data

#Data
title = 'Average Wave Height across CA (binned & averaged by month)'

layout = go.Layout(legend=dict(),
                  title = title)

fig = go.Figure(layout=layout)

fig.add_trace(go.Scatter(
    x = df_Diablo['Date'], y = df_Diablo['sig_height_ft'], 
    line=dict(color="#5CA3BB"),
    name="San Luis Obispo",
    hovertemplate=
    "<b>San Luis Obispo</b><br>" +
    "Height: %{y:0} ft<br>" +
    "Buoy: Diablo Canyon, CA - 076 (46215)" +
    "<extra></extra>"))

fig.add_trace(go.Scatter(
    x = df_Torrey['Date'], y = df_Torrey['sig_height_ft'], 
    line=dict(color="#A6D0BD"),
    name="San Diego",
    hovertemplate=
    "<b>San Diego</b><br>" +
    "Height: %{y:0} ft<br>" +
    "Buoy: Torrey Pines Outer, CA (46225)" +
    "<extra></extra>"))

fig.update_layout(
    xaxis=dict(
        showline=True,
        showgrid=False,
        showticklabels=True,
        linecolor='rgb(204, 204, 204)',
        linewidth=2,
        ticks='outside',
        tickfont=dict(
            family='Arial',
            size=16,
            color='rgb(82, 82, 82)',
        ),
    ),
    yaxis=dict(
        showgrid=False,
        zeroline=True,
        showline=True,
        showticklabels=True,
        linecolor='rgb(204, 204, 204)',
        ticks='outside',
        tickfont=dict(
            family='Arial',
            size=16,
            color='rgb(82, 82, 82)',
        )
    ),
    autosize=False,
    margin=dict(
        autoexpand=False,
        l=100,
        r=20,
        t=110,
    ),
    plot_bgcolor='white'
)

fig.update_xaxes(
        title_text = "Date",
        title_font = {"size": 16},
        title_standoff = 25)

fig.update_yaxes(
        title_text = "Wave Height (Avg)",
        title_font = {"size": 16},
        title_standoff = 25
)

fig.update_traces(mode='lines',
                 showlegend = True)

fig.update_layout(legend=dict(font_size=14, yanchor="top", y=1.21, xanchor="left", x = -0.115),
                  hoverlabel=dict(
                      bgcolor="white",
                      font_size=16,
                      font_family="Arial"))
fig.show()

py.plot(fig, filename = 'Avg_Wave_Height_SLO_SD', auto_open=True)
print(tls.get_embed('https://plotly.com/~garrettschmid/24/')) #Gets the embed link for squarespace

<iframe id="igraph" scrolling="no" style="border:none;" seamless="seamless" src="https://plotly.com/~garrettschmid/24.embed" height="525" width="100%"></iframe>


In [60]:
#Fig 3: Recorded vs Actual Wave Height
import chart_studio.plotly as py
import chart_studio.tools as tls
import chart_studio

#Data
title = 'Recorded vs Actual Wave Height (binned & averaged by month)'
SLO_time = df_wh.loc[df_wh['region'] == 'SLO', 'date']
SLO_wave_ht = df_wh.loc[df_wh['region'] == 'SLO', 'month_avg_ft']
date = df_wh['date']
wave_ht = df_wh['month_avg_ft']


layout = go.Layout(title = title,
                    legend=dict(
                        # Adjust click behavior
                        itemclick="toggleothers",
                        itemdoubleclick="toggle"))

fig = go.Figure(layout=layout)

fig.add_trace(go.Scatter(
    x = df_Diablo['Date'], y = df_Diablo['sig_height_ft'], 
    line=dict(color="#5CA3BB", width=1),
    name="Buoy Data: SLO",
    hovertemplate="<b>Buoy Data</b><br>" + "Height: %{y:0} ft<br>" + "Buoy: Diablo Canyon, CA - 076 (46215)" +
    "<extra></extra>"))
#These traces are the regions where I surfed SLO consistently
fig.add_trace(go.Scatter(
    x = date[0:8], y = wave_ht[0:8], 
    line=dict(color="#003f5c", width=3),
    name="Recorded Data: SLO",
    hovertemplate="<b>Recorded Data</b><br>" + "Height: %{y:0} ft<br>" + "<extra></extra>"))

fig.add_trace(go.Scatter(
    x = date[9:11], y = wave_ht[9:11], 
    line=dict(color="#003f5c", width=3),
    name="Recorded Data: SLO",
    showlegend=False,
    hovertemplate="<b>Recorded Data</b><br>" + "Height: %{y:0} ft<br>" + "<extra></extra>"))
fig.add_trace(go.Scatter(
    x = date[12:18], y = wave_ht[12:18], 
    line=dict(color="#003f5c", width=3),
    name="Recorded Data: SLO",
    showlegend=False,
    hovertemplate="<b>Recorded Data</b><br>" + "Height: %{y:0} ft<br>" + "<extra></extra>"))

fig.add_trace(go.Scatter(
    x = date[24:32], y = wave_ht[24:32], 
    line=dict(color="#003f5c", width=3),
    name="Recorded Data: SLO",
    showlegend=False,
    hovertemplate="<b>Recorded Data</b><br>" + "Height: %{y:0} ft<br>" + "<extra></extra>"))
#This one is a scatter
# fig.add_trace(go.Scatter(
#     x = SLO_time, y = SLO_wave_ht,
#     mode='markers',
#     name="Recorded Data: SLO",
#     marker=dict(color='#003f5c', size=8),
#     showlegend=False,
#     hovertemplate="<b>Recorded Data</b><br>" + "Height: %{y:0} ft<br>" + "<extra></extra>"))
    

fig.update_layout(
    xaxis=dict(
        showline=True,
        showgrid=False,
        showticklabels=True,
        linecolor='rgb(204, 204, 204)',
        linewidth=2,
        ticks='outside',
        tickfont=dict(
            family='Arial',
            size=16,
            color='rgb(82, 82, 82)',
        ),
    ),
    yaxis=dict(
        showgrid=False,
        zeroline=True,
        showline=True,
        showticklabels=True,
        linecolor='rgb(204, 204, 204)',
        ticks='outside',
        tickfont=dict(
            family='Arial',
            size=16,
            color='rgb(82, 82, 82)',
        )
    ),
    autosize=False,
    margin=dict(
        autoexpand=False,
        l=100,
        r=20,
        t=110,
    ),
    plot_bgcolor='white'
)

fig.update_xaxes(
        title_text = "Date",
        title_font = {"size": 16},
        title_standoff = 25)

fig.update_yaxes(
        title_text = "Wave Height (Avg)",
        title_font = {"size": 16},
        title_standoff = 25
)

fig.update_traces()

fig.update_layout(legend=dict(font_size=14, yanchor="top", y=1.21, xanchor="left", x = -0.115),
                  hoverlabel=dict(
                      bgcolor="white",
                      font_size=16,
                      font_family="Arial"))
fig.show()

py.plot(fig, filename = 'Recorded_vs_Buoy_Wave_Height', auto_open=True)
print(tls.get_embed('https://plotly.com/~garrettschmid/1/')) #Gets the embed link for squarespace

<iframe id="igraph" scrolling="no" style="border:none;" seamless="seamless" src="https://plotly.com/~garrettschmid/1.embed" height="525" width="100%"></iframe>
