# CityBikes

## Send a request to CityBikes for the city of your choice. 

In [1]:
import pandas as pd
import json
import requests
from pprint import pp

In [2]:
# Finding the API endpoint of the all cities

params = "href,location" # API cannot filter lower than level 1 for now
url = "http://api.citybik.es/v2/networks?fields=" + params

result_cities = requests.get(url)
print(result_cities)

<Response [200]>


In [3]:
# Visualize keys of result
networks = result_cities.json()['networks']
pp(networks[0])

{'href': '/v2/networks/velobike-moscow',
 'location': {'city': 'Moscow',
              'country': 'RU',
              'latitude': 55.75,
              'longitude': 37.616667}}


In [4]:
# Finding API endpoint of city of choice = 'Montréal, Qc'
city_choice = 'montr' # Since it's french name, better not use accent

## Getting endpoint (href) of city LIKE 'montr'
for network in networks:
    if city_choice in network['location']['city'].lower():
        endpoint = network["href"]
        print(f"The href for {city_choice} is: {endpoint}")
        break
else:
    print(f"No network found for {city_choice}")

The href for montr is: /v2/networks/bixi-montreal


## Parse through the response to get the details you want for the bike stations in that city (latitude, longitude, number of bikes). 

In [5]:
# Extracting the stations data for the specific endpoint

url = "http://api.citybik.es" + endpoint

result_stations = requests.get(url)
print(url)
print(result_stations)

http://api.citybik.es/v2/networks/bixi-montreal
<Response [200]>


In [17]:
# Visualize keys of result
pp(result_stations.json(), depth=2)

{'network': {'stations': [...]}}


In [7]:
# Visualize keys of stations
stations = result_stations.json()['network']['stations']

# Print keys of the first item
pp(stations[0])

{'empty_slots': 8,
 'extra': {'ebikes': 2,
           'has_ebikes': True,
           'last_updated': 1692730666,
           'payment': ['key', 'creditcard'],
           'payment-terminal': True,
           'renting': 1,
           'returning': 1,
           'slots': 11,
           'uid': '693'},
 'free_bikes': 3,
 'id': '72bfd647b3d2b650546f42319729757d',
 'latitude': 45.617499783128075,
 'longitude': -73.60601127147675,
 'name': 'Cégep Marie-Victorin',
 'timestamp': '2023-08-22T18:59:37.024000Z'}


In [8]:
# Parsing required info

# Initialize desired info
station_names = []
latitudes = []
longitudes = []
total_bikes = []

# Extract station info
for station in stations:
    station_names.append(station['name'])
    latitudes.append(station['latitude'])
    longitudes.append(station['longitude'])
    total_bikes.append(station['empty_slots'] + station['free_bikes'])

## Put your parsed results into a DataFrame.

In [9]:
# Create a DataFrame
df = {
    'station_name': station_names,
    'latitude': latitudes,
    'longitude': longitudes,
    'total_bikes': total_bikes
}

df_stations = pd.DataFrame(df)

# Pre_cleaning

In [10]:
print(df_stations.dtypes)
df_stations.info()

station_name     object
latitude        float64
longitude       float64
total_bikes       int64
dtype: object
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 797 entries, 0 to 796
Data columns (total 4 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   station_name  797 non-null    object 
 1   latitude      797 non-null    float64
 2   longitude     797 non-null    float64
 3   total_bikes   797 non-null    int64  
dtypes: float64(2), int64(1), object(1)
memory usage: 25.0+ KB


In [11]:
df_stations.head()

Unnamed: 0,station_name,latitude,longitude,total_bikes
0,Cégep Marie-Victorin,45.6175,-73.606011,11
1,Gare d'autocars de Montréal (Berri / Ontario),45.516926,-73.564257,10
2,Ateliers municipaux de St-Laurent (Cavendish /...,45.506176,-73.711186,19
3,Place Rodolphe-Rousseau (Gohier / Édouard-Laurin),45.512994,-73.682498,24
4,Centre des loisirs (Tassé / Grenet),45.514734,-73.691449,13


In [12]:
print(df_stations.duplicated().sum())

0


In [13]:
print(df_stations.describe())

         latitude   longitude  total_bikes
count  797.000000  797.000000   797.000000
mean    45.524085  -73.592979    20.877039
std      0.040555    0.048331     6.941242
min     45.416295  -73.758227     5.000000
25%     45.501804  -73.616075    16.000000
50%     45.525194  -73.582596    19.000000
75%     45.545581  -73.565364    23.000000
max     45.702349  -73.436058    78.000000


# Creating CSV

In [14]:
df_stations.to_csv('../data/stations.csv', index=False)