In [4]:
import numpy as np
import pandas as pd
import datetime as dt
import seaborn as sns
import plotly.express as px
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from scipy.signal import find_peaks
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.graphics.tsaplots import plot_acf,plot_pacf
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX

# Load data tables
Refer = pd.read_csv('C:/Users/Scott/Desktop/Projects/River_Data/RiverReferenceTable.csv')
Discharge = pd.read_csv('C:/Users/Scott/Desktop/Projects/River_Data/Ingestion/CurrentWater.csv')
Precip = pd.read_csv('C:/Users/Scott/Desktop/Projects/River_Data/Ingestion/CurrentPrecip.csv')
forecast_precip = pd.read_csv('C:/Users/Scott/Desktop/Projects/River_Data/Ingestion/NewWeatherForecast.csv')
# Standardize Date and River Name
Discharge['Date'] = pd.to_datetime(Discharge.DateTime).dt.date
Discharge['CFS'] = Discharge['Value']

####Discharge
#Join river data with reference data
Discharge = Discharge.set_index('Name').join(Refer.set_index('USGS Name')).reset_index()[[
    'Date','CFS','Name','RiverName','FishType']]

# Create Avg Daily Discharge
Avg_Discharge = Discharge.groupby(['Date','Name','RiverName','FishType']).mean().reset_index()
Avg_Discharge['Date_Name'] = Avg_Discharge.Date.astype('str')+'_'+Avg_Discharge.Name
# Remove incorrect data
Avg_Discharge = Avg_Discharge.loc[Avg_Discharge.CFS>0]

####Precipitiation
# Standardize Date
Precip['Date'] = pd.to_datetime(Precip.Date).dt.date

#Join river data with reference data
Precip = Precip.set_index('Name').join(Refer.set_index('Name')).reset_index()[[
    'Date','Precip','Name','RiverName','FishType']]

# Create Primary Key
Precip['Date_Name'] = Precip.Date.astype('str')+'_'+Precip.Name

# Create Avg Daily Precip
Precip = Precip[['Date_Name','Date','Name','Precip']].groupby(['Date','Name','Date_Name']).mean().reset_index()

#Join river data with precip data
Rivers_Flow_Rain = Avg_Discharge.set_index('Date_Name').join(Precip[['Date_Name','Precip']].set_index('Date_Name'))

#Fill precip nulls with 0
Rivers_Flow_Rain['Precip'] = Rivers_Flow_Rain.Precip.fillna(0)

# Rename completed dataframe
full = Rivers_Flow_Rain.loc[Rivers_Flow_Rain.Precip.isnull()==False]

Rivers = []
for river in full.Name.unique():
    df = full.loc[full.Name == river]
    df.reset_index(inplace=True)
    # df = df.drop(columns='index')

    # Create column for slope of discharge
    df['CFS_Slope'] = df.CFS.diff()

    # Retrieve values of Slope
    SlopeValues = df.CFS_Slope.values

    # Define the threshold for what is a good day for fishing
    #       - slope between day CFS values must be at least 1 STD from the mean 
    border = df.CFS_Slope.mean()+(df.CFS_Slope.std()/2)

    # Define troughs that are below the threshold
    peaks, _ = find_peaks(-SlopeValues, height=border)

    # Logic to build the best day flag
    #       - if the value is a trough point or has a negative low and is above 1 STD from the mean then also consider a good day 
    bestdays = []
    for i in range(len(df.CFS)):
        if i in peaks:
            x = 1
        elif (df.CFS_Slope[i]<0) & (df.CFS[i]>=border):
            x = 1
        else: x = 0
        bestdays.append(x)
    df['BestDays'] = bestdays
    Rivers.append(df)

Rivers_Flow = pd.concat(Rivers)

# Reduce data to Eel River only
new_river = Rivers_Flow.loc[Rivers_Flow['Name']=='Eel River Scotia']
new_river.reset_index(inplace=True)
new_river = new_river .drop(columns='index')
new_river['Precip_Shift'] = new_river.Precip.shift(1)

new_river['CFS_Rolling'] = new_river.CFS.rolling(window=3,min_periods=1).mean()
new_river['CFS_Slope_Rolling'] = new_river.CFS.rolling(window=3,min_periods=1).mean()
new_river['CFS_Rolling30'] = new_river.CFS.rolling(window=30,min_periods=1).mean()
new_river['new_variable'] = np.abs(new_river.CFS) - new_river.CFS_Rolling30

model = SARIMAX(new_river.CFS[1:],exog=new_river[['Precip_Shift','CFS_Rolling']].dropna(0),order=(0,1,0))
model_fit = model.fit()

length = len(new_river.CFS)
forecast = model_fit.forecast(length,exog=new_river[['Precip_Shift','CFS_Rolling']].fillna(0))

# Create figure with secondary y-axis
fig = make_subplots(rows=2,cols=1)
# Add traces
fig.add_trace(
    go.Scatter(x=new_river.Date, y=new_river.CFS, name="CFS - Observed"),
    secondary_y=False,row=1,col=1)
# fig.add_trace(
#     go.Scatter(x=new_river.Date, y=predict, name="CFS - Predicted"),
#     secondary_y=False,row=2,col=1)
fig.add_trace(
    go.Scatter(x=new_river.Date, y=forecast, name="CFS - Forecast"),
    secondary_y=False,row=2,col=1)

# Add figure title
fig.update_layout(
    title_text="Observed, Predicted, & Forecasted ")
# Set x-axis title
fig.update_xaxes(title_text="Date")
# Set y-axes titles
fig.update_yaxes(title_text="Discharge (cfs)", secondary_y=False)
fig.update_yaxes(title_text="Precip (mm)", secondary_y=True)
fig.show()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['CFS_Slope'] = df.CFS.diff()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['BestDays'] = bestdays
