# Project 2
## Team B, CA Beaches


In [2]:
# set environment
from datetime import date
import pandas as pd
import json
import requests

In [3]:
from sqlalchemy import create_engine, insert
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
#from config import password

In [4]:
# define our base URL
base_url = "https://admin.beachreportcard.org/api/locations"

In [5]:
# read JSON data from web
r = requests.get(base_url)

# create empty list of column names
title_list = []

# loop through water quality data
for row in r.json():
    # grab all the keys from the source dict
    key_list = row["_source"].keys()
    
    # if we find a key we haven't seen
    # before, add it to out list
    for key in key_list:
        if key not in title_list:
            title_list.append(key)
            
#print(title_list)
    


In [6]:
# create empty beach dict
beach_data = {}

# loop through list of all titles found across all beaches
# and add an empty array to the beach data for each
for title in title_list:
    # need to break up the geo coordinates
    if title == "geo":
        beach_data["latitude"] = []
        beach_data["longitude"] = []
    else:
        beach_data[title] = []

#print(beach_data)

# loop through all the beaches we scraped
for beach in r.json():
    # we only want cali data
    if beach["_source"]["state"] == "CA":
     
        for title in title_list:
            if title == "alerts":
                # grab current Alerts instead of _source.alerts
                if "currentAlert" in beach:
                    beach_data["alerts"].append(beach["currentAlert"])
                else:
                    beach_data["alerts"].append("")

            # check for data in associated with this key
            if title in beach["_source"]:
                # skip if alerts, we got it from currentAlert
                if title == "alerts":
                    continue
                    
                if title == "geo":
                    # separate coordinates
                    beach_data["latitude"].append(beach["_source"][title][0])
                    beach_data["longitude"].append(beach["_source"][title][1])
                else:
                    # we have data in this field, add it to our array
                    beach_data[title].append(beach["_source"][title])
            else:
                # no data found for this column name --
                # set to null
                beach_data[title].append("")
        

#print(beach_data)
#print(len(beach_data))

In [7]:
# dump data into dataframe
beach_df = pd.DataFrame(beach_data)

In [8]:
# display dataframe
beach_df.head()

Unnamed: 0,id,title,name1,name2,name3,description,notes,comments,address,city,...,dry_grade,grade_note,wet_grade,grade_created,nowcast_pass_flag,nowcast_date,annual_summer_dry,annual_year_wet,annual_winter_dry,annual_year
0,1044,31502 Victoria Point,,,,"Single family residence sewage spill, 100 yard...",,,,Malibu,...,ns,,ns,2015-05-04T00:00:00.000Z,,2018-07-04T05:00:00.000Z,,,,
1,55,Abalone Cove Shoreline Park,Abalone Cove,Shoreline Park,,Abalone Cove is an 'open ocean' rocky beach lo...,,abalone,,Rancho Palos Verdes,...,A+,,A+,2021-04-02T00:00:00.000Z,,2018-05-07T05:00:00.000Z,,,,
2,133,Alamitos Bay - 56th Place - on bayside,Alamitos Bay,at 56th Place,bayside,,,alamitos56,,Long Beach,...,C,,ns,2020-04-10T00:00:00.000Z,,,A,D,A,2018.0
3,608,Alamitos Bay - Division Street and Bayshore,Alamitos Bay,between Division St. and Bayshore,,,,alamitosdivision,,Long Beach,...,ns,,,2019-03-15T00:00:00.000Z,,,,,,
4,135,Alamitos Bay - shore float,Alamitos Bay,shore float,,,,alamitosfloat,,Long Beach,...,A+,,B,2021-04-02T00:00:00.000Z,,2020-08-26T05:00:00.000Z,A,C,A+,2018.0


In [9]:
# pull out columns of value
beach_df = beach_df[["id", "title", "name1", "latitude", "longitude", "address", "city", "state", "zip", "county", "active", "grade_updated","dry_grade", "wet_grade", "annual_summer_dry", "annual_year_wet", "annual_winter_dry", "annual_year", "grade_created", "alerts"]]

In [10]:
beach_df.head()

Unnamed: 0,id,title,name1,latitude,longitude,address,city,state,zip,county,active,grade_updated,dry_grade,wet_grade,annual_summer_dry,annual_year_wet,annual_winter_dry,annual_year,grade_created,alerts
0,1044,31502 Victoria Point,,34.035252,-118.86132,,Malibu,CA,90265,Los Angeles,False,2018-07-04T12:39:31.100Z,ns,ns,,,,,2015-05-04T00:00:00.000Z,False
1,55,Abalone Cove Shoreline Park,Abalone Cove,33.741585,-118.379189,,Rancho Palos Verdes,CA,90275,Los Angeles -- LA Sanitation,True,2021-04-02T00:00:00.000Z,A+,A+,,,,,2021-04-02T00:00:00.000Z,False
2,133,Alamitos Bay - 56th Place - on bayside,Alamitos Bay,33.751183333,-118.125783333,,Long Beach,CA,90803,Los Angeles -- Long Beach,True,2020-04-10T00:00:00.000Z,C,ns,A,D,A,2018.0,2020-04-10T00:00:00.000Z,False
3,608,Alamitos Bay - Division Street and Bayshore,Alamitos Bay,33.756313,-118.131158,,Long Beach,CA,90803,Los Angeles -- Long Beach,False,2019-03-15T00:00:00.000Z,ns,,,,,,2019-03-15T00:00:00.000Z,False
4,135,Alamitos Bay - shore float,Alamitos Bay,33.754166666,-118.13095,,Long Beach,CA,90803,Los Angeles -- Long Beach,True,2021-04-02T00:00:00.000Z,A+,B,A,C,A+,2018.0,2021-04-02T00:00:00.000Z,False


In [12]:
# write dataframe to a CSV file
beach_df.to_csv("data/grade_info.csv")