[View in Colaboratory](https://colab.research.google.com/github/Thanhvh/2015/blob/master/Busstop.ipynb)

# Objectives

1. Visualizing bus stops data and employee data on map.
2. Finding 10 best bus stops

# Installing and importing libraries

**1. Installing libraries**





In [0]:
!pip install pandas 
!pip install geopy
!pip install -U folium

**2. Importing libraries**

In [0]:
import pandas as pd
import geopy 
import folium

# Importing and Manipulating data

**3. Importing dataset**

In [7]:
from google.colab import files
files.upload()

{}

In [0]:
bus_stop = pd.read_csv('Potentail_Bust_Stops.csv')
em_addr = pd.read_csv('Employee_Addresses.csv')

**4. Printing some and checking information**


In [0]:
print(bus_stop.head())
print(em_addr.head())
bus_stop.info()
em_addr.info()

**5.  Replacing string value ST  to Street in bus_stop**

In [12]:
bus_stop.Street_One = bus_stop.Street_One.str.replace(' ST', ' Street')
bus_stop.Street_Two = bus_stop.Street_Two.str.replace(' ST', ' Street')
print(bus_stop.head())

       Street_One             Street_Two
0  MISSION Street              ITALY AVE
1  MISSION Street  NEW MONTGOMERY Street
2  MISSION Street            01ST Street
3  MISSION Street            20TH Street
4  MISSION Street         FREMONT Street


# Visualizing data on map

**6. Using folium to draw San Francisco map**

In [13]:
from geopy.geocoders import GoogleV3
from geopy.geocoders.base import Geocoder
from geopy.exc import (
    GeocoderQueryError,
    GeocoderQuotaExceeded,
    GeocoderAuthenticationFailure,
)
city="San Francisco, CA, USA"
geolocator = GoogleV3(api_key="AIzaSyDX6BZ0rQIZUHz5yRzgEzG1nSclCku6vjs")
location = geolocator.geocode(query=city)
SanFC_map = folium.Map((location.latitude, location.longitude), zoom_start=16)

SanFC_map

**7. Extracting intersection longitude and latitude**


In [0]:
lat_Bus=[]
long_Bus=[]
geolocator = GoogleV3(api_key="AIzaSyDX6BZ0rQIZUHz5yRzgEzG1nSclCku6vjs")
for i in range(0, bus_stop.Street_Two.count()):
    location = geolocator.geocode(query=bus_stop.Street_One[i]+" & "+bus_stop.Street_Two[i]+" "+city, timeout=100)
    lat_Bus.append(location.latitude)
    long_Bus.append(location.longitude)

In [15]:
print(lat_Bus)
print(long_Bus)

[37.7184779, 37.7874561, 37.7899543, 37.7586404, 37.7904547, 37.7699578, 37.7690631, 37.7911592, 37.7456035, 37.73395319999999, 37.7176754, 37.7883744, 37.7892505, 37.7287485, 37.7146065, 37.7423869, 37.7918484, 37.7309049, 37.7666804, 37.7417506, 37.7282519, 37.7277166, 37.7367496, 37.7095562, 37.7932317, 37.7083052, 37.7627139, 37.7880171, 37.7822117, 37.716494, 37.7410375, 37.7554447, 37.766478, 37.7467814, 37.7634446, 37.7292307, 37.7146065, 37.749044, 37.743139, 37.7271822, 37.7107534, 37.760087, 37.77101, 37.7938433, 37.7602395, 37.7388435, 37.723902, 37.7428312, 37.7154464, 37.709959, 37.7743325, 37.7422074, 37.78464, 37.7200367, 37.7767134, 37.744038, 37.7215808, 37.7211696, 37.7868392, 37.7570409, 37.7630637, 37.7116056, 37.729844, 37.7360862, 37.7114504, 37.7240279, 37.7682842, 37.7139529, 37.7373612, 37.7752838, 37.7094211, 37.7172741, 37.789179, 37.7889865, 37.727656, 37.7265972, 37.7734886, 37.7830904, 37.7352463, 37.7852869, 37.7251752, 37.7342953, 37.7456035, 37.7299724,

**8. Adding long_Bus and lat_Bus to Bus_stop data frame**

In [16]:
lat_Bus=pd.Series(lat_Bus)
long_Bus=pd.Series(long_Bus)
bus_stop = pd.concat((bus_stop, lat_Bus.rename('Lat')), axis=1)
bus_stop = pd.concat((bus_stop, long_Bus.rename('Long')), axis=1)
print(bus_stop.head())

       Street_One             Street_Two        Lat        Long
0  MISSION Street              ITALY AVE  37.718478 -122.439536
1  MISSION Street  NEW MONTGOMERY Street  37.787456 -122.400523
2  MISSION Street            01ST Street  37.789954 -122.397514
3  MISSION Street            20TH Street  37.758640 -122.419077
4  MISSION Street         FREMONT Street  37.790455 -122.396726


**9. Saving bus_stop to csv**

In [0]:
bus_stop.to_csv('bus_stop.csv')

**10. Drawing SanFC_map and marking every intersection with a blue icon**

In [17]:
for i in range(0, bus_stop.Lat.count()):
    folium.Marker([bus_stop.Lat[i],bus_stop.Long[i]]).add_to(SanFC_map)
SanFC_map

**11. Scrapping each latitude and longtitude to employee addr**

In [0]:
import requests
latAddr = []
longAddr = []
api_key = "AIzaSyDX6BZ0rQIZUHz5yRzgEzG1nSclCku6vjs"
for i in range(0, em_addr.address.count()):
    api_response = requests.get('https://maps.googleapis.com/maps/api/geocode/json?address={0}&key={1}'.format(em_addr.address[i], api_key))
    api_response_dict = api_response.json()
    if api_response_dict['status'] == 'OK':
        latitude = api_response_dict['results'][0]['geometry']['location']['lat']
        longitude = api_response_dict['results'][0]['geometry']['location']['lng']
        latAddr.append(latitude)
        longAddr.append(longitude)

**12. Adding longAddr and latAddr to emp_addr data frame**

In [0]:
latAddr = pd.Series(latAddr)
longAddr = pd.Series(longAddr)
em_addr = pd.concat((em_addr, latAddr.rename('Lat')), axis=1)
em_addr = pd.concat((em_addr, longAddr.rename('Long')), axis=1)
print(em_addr.head())

**13. Saving emp_addr to csv and reading csv file**

In [0]:
em_addr.to_csv('em_addr.csv')
em_addr_longlat = pd.read_csv('em_addr.csv')

**14. Positioning employee addr with red circle**

In [23]:
for i in range(0, em_addr_longlat.Lat.count()):
    folium.Circle(radius=10, location=[em_addr_longlat.Lat[i], em_addr_longlat.Long[i]],color='red',fill=True).add_to(SanFC_map)
SanFC_map

# Finding 10 best bus stops

**15. Importing dataset for Kmeans**

In [0]:
dataset = pd.read_csv('em_addr.csv')
X = dataset.iloc[:, [3,4]].values
print(X)

**16. Applying Kmeans to the dataset**

In [0]:
from sklearn.cluster import KMeans
kmeans = KMeans(n_clusters=10, init = 'k-means++', max_iter = 300, n_init= 10, random_state= 0)
y_kmeans = kmeans.fit_predict(X)