# Dataset Refactoring

In [122]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from datetime import datetime

## Get Data

In [123]:
filename = "./Dataset/202010-capitalbikeshare-tripdata/202010-capitalbikeshare-tripdata.csv"
data = pd.read_csv(filename)

## Get All Stations

In [124]:
stations_start = data[['start_station_id','start_station_name','start_lat','start_lng']].copy()
stations_start.rename(
    columns={"start_station_id": "ID", "start_station_name": "Name", "start_lat": "Lat", "start_lng": "Lng"},
    inplace=True )

stations_end = data[['end_station_id','end_station_name','end_lat','end_lng']].copy()
stations_end.rename(
    columns={"end_station_id": "ID", "end_station_name": "Name", "end_lat": "Lat", "end_lng": "Lng"},
    inplace=True )

stations = pd.concat(objs=[stations_start, stations_end])

## Get Duration Times

In [125]:
ended_times = []
for i in data['ended_at'].values:
    ended_times.append(datetime.strptime(i, '%Y-%m-%d %H:%M:%S'))

time_spent = []
ind = 0
for i in data['started_at'].values:
    time_spent.append(ended_times[ind] - datetime.strptime(i, '%Y-%m-%d %H:%M:%S'))
    ind += 1

data.insert(3, "duration", time_spent, True)

data = data.drop(columns=['start_station_name','start_lat','start_lng','end_station_name','end_lat','end_lng', 'ended_at'])

## Get Member

In [126]:
member = data['member_casual'] == "member"
data['member_casual'].update(member)
data.rename(
    columns={"member_casual": "member"},
    inplace=True )

## Get Rideable Type

In [127]:
# member = data['rideable_type'] == "docked_bike"
# data['rideable_type'].update(member)
# data.rename(
#     columns={"rideable_type": "Docked"},
#     inplace=True )

## Final Datasets

In [128]:
stations


Unnamed: 0,ID,Name,Lat,Lng
0,305.0,Constitution Ave & 2nd St NW/DOL,38.892275,-77.013917
1,413.0,8th & O St NW,38.908640,-77.022770
2,413.0,8th & O St NW,38.908640,-77.022770
3,24.0,Massachusetts Ave & Dupont Circle NW,38.910100,-77.044400
4,452.0,Henry Bacon Dr & Lincoln Memorial Circle NW,38.890539,-77.049383
...,...,...,...,...
229791,43.0,Anacostia Ave & Benning Rd NE / River Terrace,38.896544,-76.960120
229792,59.0,10th & Monroe St NE,38.932514,-76.992889
229793,277.0,Lincoln Memorial,38.888255,-77.049437
229794,452.0,Henry Bacon Dr & Lincoln Memorial Circle NW,38.890539,-77.049383


In [129]:
data

Unnamed: 0,ride_id,rideable_type,started_at,duration,start_station_id,end_station_id,member
0,3A8C551C68AD3676,docked_bike,2020-10-09 16:44:38,0 days 01:11:19,305.0,117.0,False
1,BF72D49088F26637,docked_bike,2020-10-31 09:27:31,0 days 00:11:01,413.0,185.0,True
2,3F1B0E9624087288,docked_bike,2020-10-28 17:11:50,0 days 00:09:17,413.0,531.0,True
3,F96BDE7A4EC2BA19,docked_bike,2020-10-11 11:23:02,0 days 00:23:16,24.0,141.0,True
4,2472C0AE0A0CE012,docked_bike,2020-10-20 13:32:51,0 days 00:28:08,452.0,185.0,False
...,...,...,...,...,...,...,...
229791,62E64B964B60B718,docked_bike,2020-10-14 16:34:15,0 days 00:47:52,43.0,43.0,False
229792,313F1242C40DB408,docked_bike,2020-10-02 19:11:09,0 days 00:19:26,65.0,59.0,True
229793,11D0589008C626B6,docked_bike,2020-10-09 11:32:47,0 days 01:30:01,647.0,277.0,False
229794,D52C7FA0023CFCE4,docked_bike,2020-10-24 16:11:06,0 days 01:56:53,531.0,452.0,False


# Get Route Coordinates

In [151]:
import requests

start = stations.loc[stations["ID"] == data.loc[0]["start_station_id"]]
end = stations.loc[stations["ID"] == data.loc[0]["end_station_id"]]

lat_start = str(start.loc[0]["Lat"])
lng_start = str(start.loc[0]["Lng"])

lat_end = str(end.loc[0]["Lat"])
lng_end = str(end.loc[0]["Lng"])

url = "https://www.google.fr/maps/dir/{},{}/{},{}/data=!4m12!4m11!1m3!2m2!1d".format(lat_start, lng_start, lat_end, lng_end)

print(url)

# result = requests.get(url)
# result.text

https://www.google.fr/maps/dir/38.892275,-77.013917/38.88627694506511,-77.02824175357819/data=!4m12!4m11!1m3!2m2!1d
