# Analytics Avengers

In [13]:
# Dependencies and Setup
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np

# File to Load
San_Francisco_path = Path("Data/Analytics_Avengers-SF_CA_Sea_Levels.csv")
San_Diego_path = Path("Data/Analytics_Avengers-San_Diego_CA_Sea_Levels.csv")
Key_West_path = Path("Data/Analytics_Avengers-Key_West_FL_Sea_Level.csv")
Honolulu_path = Path("Data/Analytics_Avengers-Honolulu_Hi_Sea_Levels.csv")
Charleston_path = Path("Data/Analytics_Avengers-Charleston_SC_Sea_Level.csv")
Boston_path = Path("Data/Analytics_Avengers-Boston_MA_Sea_Levels.csv")
Seattle_path = Path("Data/Analytics_Avengers-Seattle_WA_Sea_Level.csv")

# Read Data Files and store them into Pandas DataFrames
San_Francisco = pd.read_csv(San_Francisco_path)
San_Diego = pd.read_csv(San_Diego_path)
Key_West = pd.read_csv(Key_West_path)
Honolulu = pd.read_csv(Honolulu_path)
Charleston = pd.read_csv(Charleston_path)
Boston = pd.read_csv(Boston_path)
Seattle = pd.read_csv(Seattle_path)

# Display the first few rows of the San Francisco DataFrame
San_Francisco.head()


Unnamed: 0,City,Region,Date,Time (GMT),Highest,MHHW (ft),MHW (ft),MSL (ft),MTL (ft),MLW (ft),MLLW (ft),Lowest (ft),Inf
0,San Francisco,West,1/1/1923,0:00,5.923,5.142,4.532,2.514,2.554,0.562,-0.488,-1.675,0
1,San Francisco,West,2/1/1923,0:00,5.825,4.975,4.493,2.514,2.534,0.572,-0.235,-1.275,0
2,San Francisco,West,3/1/1923,0:00,5.723,4.512,4.204,2.203,2.213,0.224,-0.507,-1.177,0
3,San Francisco,West,4/1/1923,0:00,5.825,4.834,4.555,2.544,2.564,0.572,-0.215,-1.275,0
4,San Francisco,West,5/1/1923,0:00,6.123,4.844,4.384,2.354,2.393,0.395,-0.576,-2.177,0


In [14]:
# List of file paths
file_paths = [
    "Data/Analytics_Avengers-SF_CA_Sea_Levels.csv",
    "Data/Analytics_Avengers-San_Diego_CA_Sea_Levels.csv",
    "Data/Analytics_Avengers-Key_West_FL_Sea_Level.csv",
    "Data/Analytics_Avengers-Honolulu_Hi_Sea_Levels.csv",
    "Data/Analytics_Avengers-Charleston_SC_Sea_Level.csv",
    "Data/Analytics_Avengers-Boston_MA_Sea_Levels.csv",
    "Data/Analytics_Avengers-Seattle_WA_Sea_Level.csv"
]

# Create an empty list to store DataFrames
dataframes = []

# Loop through the file paths and read each CSV file into a DataFrame
for file_path in file_paths:
    df = pd.read_csv(file_path)
    dataframes.append(df)

# Concatenate all DataFrames vertically into one
merged_data = pd.concat(dataframes, ignore_index=True)
merged_data

Unnamed: 0,City,Region,Date,Time (GMT),Highest,MHHW (ft),MHW (ft),MSL (ft),MTL (ft),MLW (ft),MLLW (ft),Lowest (ft),Inf
0,San Francisco,West,1/1/1923,0:00,5.923,5.142,4.532,2.514,2.554,0.562,-0.488,-1.675,0
1,San Francisco,West,2/1/1923,0:00,5.825,4.975,4.493,2.514,2.534,0.572,-0.235,-1.275,0
2,San Francisco,West,3/1/1923,0:00,5.723,4.512,4.204,2.203,2.213,0.224,-0.507,-1.177,0
3,San Francisco,West,4/1/1923,0:00,5.825,4.834,4.555,2.544,2.564,0.572,-0.215,-1.275,0
4,San Francisco,West,5/1/1923,0:00,6.123,4.844,4.384,2.354,2.393,0.395,-0.576,-2.177,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
8420,Seattle,West,4/1/2023,0:00,12.466,11.560,10.697,6.937,6.946,3.194,0.658,-1.426,0
8421,Seattle,West,5/1/2023,0:00,12.390,11.750,10.625,6.891,6.918,3.210,0.267,-2.200,0
8422,Seattle,West,6/1/2023,0:00,12.511,11.645,10.559,6.727,6.803,3.046,-0.550,-3.607,0
8423,Seattle,West,7/1/2023,0:00,12.771,11.586,10.668,6.763,6.882,3.095,-0.723,-3.811,0


In [16]:
# Rename the columns
merged_data_new = merged_data.rename(columns={'MHHW (ft)': 'Mean Highest', 'MLLW (ft)': 'Mean Lowest'})

# Display the first few rows of the DataFrame to confirm the column names have been changed
merged_data_new

Unnamed: 0,City,Region,Date,Time (GMT),Highest,Mean Highest,MHW (ft),MSL (ft),MTL (ft),MLW (ft),Mean Lowest,Lowest (ft),Inf
0,San Francisco,West,1/1/1923,0:00,5.923,5.142,4.532,2.514,2.554,0.562,-0.488,-1.675,0
1,San Francisco,West,2/1/1923,0:00,5.825,4.975,4.493,2.514,2.534,0.572,-0.235,-1.275,0
2,San Francisco,West,3/1/1923,0:00,5.723,4.512,4.204,2.203,2.213,0.224,-0.507,-1.177,0
3,San Francisco,West,4/1/1923,0:00,5.825,4.834,4.555,2.544,2.564,0.572,-0.215,-1.275,0
4,San Francisco,West,5/1/1923,0:00,6.123,4.844,4.384,2.354,2.393,0.395,-0.576,-2.177,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
8420,Seattle,West,4/1/2023,0:00,12.466,11.560,10.697,6.937,6.946,3.194,0.658,-1.426,0
8421,Seattle,West,5/1/2023,0:00,12.390,11.750,10.625,6.891,6.918,3.210,0.267,-2.200,0
8422,Seattle,West,6/1/2023,0:00,12.511,11.645,10.559,6.727,6.803,3.046,-0.550,-3.607,0
8423,Seattle,West,7/1/2023,0:00,12.771,11.586,10.668,6.763,6.882,3.095,-0.723,-3.811,0


In [18]:
# Select the relevant columns
reduced_data = merged_data_new[['City', 'Region', 'Date', 'Mean Highest', 'Mean Lowest']]

# Display the first few rows of the reduced DataFrame
reduced_data


Unnamed: 0,City,Region,Date,Mean Highest,Mean Lowest
0,San Francisco,West,1/1/1923,5.142,-0.488
1,San Francisco,West,2/1/1923,4.975,-0.235
2,San Francisco,West,3/1/1923,4.512,-0.507
3,San Francisco,West,4/1/1923,4.834,-0.215
4,San Francisco,West,5/1/1923,4.844,-0.576
...,...,...,...,...,...
8420,Seattle,West,4/1/2023,11.560,0.658
8421,Seattle,West,5/1/2023,11.750,0.267
8422,Seattle,West,6/1/2023,11.645,-0.550
8423,Seattle,West,7/1/2023,11.586,-0.723
