In [1]:
# Import dependencies
import pandas as pd

# Import sci-kit leanring modules
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# Added SQLalchemy
import sqlalchemy as db
from config import password

# Setup Database Connection and Import Data

In [2]:
# create the connection to the PostgreSQL database.
db_string = f"postgresql://postgres1:{password}@final-project-database.crwsgvv9ibw0.us-east-1.rds.amazonaws.com:5432/final_project_db"
con = db.create_engine(db_string).connect()

  """)


## General Data

In [3]:
# Import and view data table
data_df = pd.read_sql_table("galveston_data_join",con)
data_df.head()

Unnamed: 0,beach_id,beach_name,start_lat,start_long,end_lat,end_long,waterbody_type,station_id,station_name,bacteria_count,...,date,week,month,year,avg_temp1,max_temp1,min_temp1,precipitation1,precipitation54,precipitation18
0,TX767833,Sea Isle,29.157639,-95.011542,29.125974,-95.062028,Open Coast,GAL005,Terramar Beach,40.0,...,2007-01-22,4,1,2007,52.0,53.0,50.0,0.0,,
1,TX767833,Sea Isle,29.157639,-95.011542,29.125974,-95.062028,Open Coast,GAL005,Terramar Beach,38.0,...,2007-01-22,4,1,2007,52.0,53.0,50.0,0.0,,
2,TX767833,Sea Isle,29.157639,-95.011542,29.125974,-95.062028,Open Coast,GAL007,Sea Isle South,58.0,...,2007-01-22,4,1,2007,52.0,53.0,50.0,0.0,,
3,TX767833,Sea Isle,29.157639,-95.011542,29.125974,-95.062028,Open Coast,GAL007,Sea Isle South,48.0,...,2007-01-22,4,1,2007,52.0,53.0,50.0,0.0,,
4,TX974690,Jamaica Beach,29.182981,-94.969426,29.176498,-94.980493,Open Coast,GAL014,Jamaica Beach South,64.0,...,2007-01-22,4,1,2007,52.0,53.0,50.0,0.0,,


## Load Weather Station Data

In [7]:
# Load the Primary Weather Stations Data
wx1_df = pd.read_sql_table("weather_station1",con)
wx18_df = pd.read_sql_table("weather_station18",con)
wx54_df = pd.read_sql_table("weather_station54",con)


Unnamed: 0,date1,avg_temp1,max_temp1,min_temp1,precipitation1,snowfall1,snow_depth1
0,1946-08-01,,86.0,77.0,0.0,0.0,0.0
1,1946-08-02,,80.0,78.0,0.0,0.0,0.0
2,1946-08-03,,90.0,80.0,0.0,0.0,0.0
3,1946-08-04,,91.0,81.0,0.0,0.0,0.0
4,1946-08-05,,91.0,80.0,0.0,0.0,0.0


In [8]:
wx1_df.head()

Unnamed: 0,date1,avg_temp1,max_temp1,min_temp1,precipitation1,snowfall1,snow_depth1
0,1946-08-01,,86.0,77.0,0.0,0.0,0.0
1,1946-08-02,,80.0,78.0,0.0,0.0,0.0
2,1946-08-03,,90.0,80.0,0.0,0.0,0.0
3,1946-08-04,,91.0,81.0,0.0,0.0,0.0
4,1946-08-05,,91.0,80.0,0.0,0.0,0.0


In [9]:
wx18_df.head()

Unnamed: 0,date18,precipitation18
0,2012-09-01,0.01
1,2012-09-02,0.9
2,2012-09-12,0.02
3,2012-09-13,0.02
4,2012-09-14,0.18


In [10]:
wx54_df.head()

Unnamed: 0,date54,precipitation54
0,2015-02-07,0.0
1,2015-02-08,0.0
2,2015-02-09,0.0
3,2015-02-11,0.0
4,2015-02-12,0.0


# Data Processing and Feature Engineering

## Compute 5 Day averages and sums for WX data

In [11]:
# Add 5 Day Total rain fall ws18 and ws54
wx18_df["5_day_precip18"] = wx18_df["precipitation18"].rolling(5).sum()
wx54_df["5_day_precip54"] = wx54_df["precipitation54"].rolling(5).sum()

In [15]:
# Add 5 Day Total Rain and 5 Day average for WS1
wx1_df["5_day_precip1"]=wx1_df["precipitation1"].rolling(5).sum()
wx1_df["5_day_temp"]=wx1_df["avg_temp1"].rolling(5).mean()
wx1_df["5_day_temp_max"]=wx1_df["max_temp1"].rolling(5).mean()
wx1_df["5_day_temp_min"]=wx1_df["min_temp1"].rolling(5).mean()

In [17]:
wx1_df.tail()

Unnamed: 0,date1,avg_temp1,max_temp1,min_temp1,precipitation1,snowfall1,snow_depth1,5_day_precip1,5_day_temp,5_day_temp_max,5_day_temp_min
16978,2022-05-31,86.0,91.0,83.0,0.0,0.0,0.0,-2.220446e-15,84.8,90.6,80.8
16979,2022-06-01,86.0,90.0,81.0,0.0,0.0,0.0,-2.220446e-15,85.2,90.0,81.4
16980,2022-06-02,85.0,93.0,77.0,0.59,0.0,0.0,0.59,85.4,90.8,81.0
16981,2022-06-03,83.0,90.0,78.0,0.0,0.0,0.0,0.59,85.0,90.8,80.4
16982,2022-06-04,83.0,89.0,80.0,0.0,0.0,0.0,0.59,84.6,90.6,79.8


In [18]:
# Drop the redudant columns
drop_columns = ["avg_temp1",	"max_temp1",	"min_temp1",	"precipitation1",	"snowfall1",	"snow_depth1"]
wx1_df.drop(drop_columns,1,inplace=True)
wx1_df.head()

  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,date1,5_day_precip1,5_day_temp,5_day_temp_max,5_day_temp_min
0,1946-08-01,,,,
1,1946-08-02,,,,
2,1946-08-03,,,,
3,1946-08-04,,,,
4,1946-08-05,0.0,,87.6,79.2


In [19]:
# Drop ther redudant columns for the other wx
wx18_df.drop("precipitation18",1,inplace=True)
wx54_df.drop("precipitation54",1,inplace=True)

  
  This is separate from the ipykernel package so we can avoid doing imports until


In [20]:
wx18_df.head()

Unnamed: 0,date18,5_day_precip18
0,2012-09-01,
1,2012-09-02,
2,2012-09-12,
3,2012-09-13,
4,2012-09-14,1.13
