# Goals: Creating a stations dataframe with all stations divvy has in the city of chicago
    * 1. Creating json object from divvy stations feed
    * 2. Convert json object to a DataFrame
    * 3. Clean Dataframe
    * 4. Convert to a GeoDataframe (by converting latitude and longitude to a geometrical point)
    * 5. Upload to SQL database


In [1]:
# Import requests package
import pandas as pd
import urllib.request
import json

# Import libraries for SQL handling
from sql_functions import get_engine

# importing geopandas:
import geopandas as gpd

# Package shapely.geomerty is usefull when handling geometrical objects like points (lat,lon), lines, polygons etc. 
from shapely import wkt
from shapely.geometry import Polygon, LineString, multilinestring, Point, multipolygon, MultiLineString

In [2]:
# Create json object from divvy stations feed
url = urllib.request.urlopen('https://gbfs.lyft.com/gbfs/2.3/chi/en/station_information.json')
stations = json.load(url)

## 2. Convert json object to a DataFrame

In [3]:
# Copy content of the list in 'stations' to variable
df_list = list(dict(stations['data'])['stations'])

In [4]:
# Use method json_normalize to read all elements of the list to a DataFrame
df_test = pd.json_normalize(df_list)

In [5]:
df_test.head()

Unnamed: 0,lon,short_name,station_id,name,lat,capacity,rental_uris.ios,rental_uris.android,region_id,address
0,-87.672797,TA1307000138,a3a83ba1-a135-11e9-9cda-0a87ae2ba916,Wood St & Webster Ave,41.921057,19,https://chi.lft.to/lastmile_qr_scan,https://chi.lft.to/lastmile_qr_scan,,
1,-87.649264,TA1307000120,a3a79adb-a135-11e9-9cda-0a87ae2ba916,Green St & Madison St,41.881859,27,https://chi.lft.to/lastmile_qr_scan,https://chi.lft.to/lastmile_qr_scan,,
2,-87.650154,KA1504000171,a3af52bc-a135-11e9-9cda-0a87ae2ba916,Marine Dr & Ainslie St,41.9716,23,https://chi.lft.to/lastmile_qr_scan,https://chi.lft.to/lastmile_qr_scan,,
3,-87.649603,13389,a3a8855c-a135-11e9-9cda-0a87ae2ba916,Clarendon Ave & Junior Ter,41.961004,23,https://chi.lft.to/lastmile_qr_scan,https://chi.lft.to/lastmile_qr_scan,,
4,-87.677296,23002,1799277526470631980,Lincoln Ave & Byron St,41.952372,15,https://chi.lft.to/lastmile_qr_scan,https://chi.lft.to/lastmile_qr_scan,,


## 3. Clean Dataframe

In [6]:
# Drop unnecessary columns
df_test.drop(columns=["rental_uris.android", "rental_uris.ios", "region_id"], inplace=True)

In [7]:
# Round longitude and latitude to 6 decimals
df_test['lat'] = round(df_test['lat'], 6)
df_test['lon'] = round(df_test['lon'], 6)

In [8]:
# Rename columns
df_test.rename(columns={"station_id": "internal_id"}, inplace=True)
df_test.rename(columns={"name": "station_name"}, inplace=True)
df_test.rename(columns={"short_name": "station_id"}, inplace=True)

In [9]:
df_test.head(1)

Unnamed: 0,lon,station_id,internal_id,station_name,lat,capacity,address
0,-87.672797,TA1307000138,a3a83ba1-a135-11e9-9cda-0a87ae2ba916,Wood St & Webster Ave,41.921057,19,


## Creating/Converting to GeoDataFrame:
    * using latitude and longitude to create a geometry POINT for every station
    * for this i use: gpd.points_from_xy() Function
    help: https://geopandas.org/en/stable/gallery/create_geopandas_from_pandas.html

In [10]:
# creating a geodataframe by combining lat and long , crs = WGS 84 
gdf_stations = gpd.GeoDataFrame(df_test, crs="WGS 84", geometry= gpd.points_from_xy(df_test["lon"],df_test["lat"]))

In [11]:
# Sort columns in a final DataFrame
gdf_stations = gdf_stations[['station_id', 'station_name', 'lat', 'lon', 'internal_id', 'capacity', 'address', 'geometry']]

In [12]:
# Sort values in column 'station_id'
gdf_stations.sort_values('station_id', inplace=True)
gdf_stations.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 1662 entries, 420 to 1661
Data columns (total 8 columns):
 #   Column        Non-Null Count  Dtype   
---  ------        --------------  -----   
 0   station_id    721 non-null    object  
 1   station_name  1662 non-null   object  
 2   lat           1662 non-null   float64 
 3   lon           1662 non-null   float64 
 4   internal_id   1662 non-null   object  
 5   capacity      1662 non-null   int64   
 6   address       941 non-null    object  
 7   geometry      1662 non-null   geometry
dtypes: float64(2), geometry(1), int64(1), object(4)
memory usage: 116.9+ KB


## Visualization on Map of the City of Chicago:

In [13]:
gdf_stations.explore()

Use the following link, to display the map with all Divvy stations in the City of Chicago
[Divvy Stations](https://nbviewer.org/github/Brettmett/Divvy_Bikeshare_Chicago/blob/brettmett_branch/01_get_stations_data.ipynb)

## Upload to SQL database:

In [None]:
# # Push DataFrame with stations to SQL Database:
# table_name = 'stations'

# gdf_stations.to_sql(name=table_name, # Name of SQL table
#                     con=engine, # Engine or connection
#                     if_exists='replace', # Drop the table before inserting new values 
#                     schema=schema, # Use schema that was defined earlier
#                     index=False, # Write DataFrame index as a column
#                     chunksize=5000, # Specify the number of rows in each batch to be written at a time
#                     method='multi') # Pass multiple values in a single INSERT clause
# print(f"The {table_name} table was imported successfully.")