# Exploratory Data Analysis

In [1]:
import sys
sys.path.insert(0, "../src")

In [2]:
from src.eda import Gatherer, Selector

## 1. Data Gathering

In [3]:
urls = {
    2014: "https://web.archive.org/web/20170221074848/https://s3.amazonaws.com/pronto-data/open_data_year_one.zip",
    2015: "https://web.archive.org/web/20220314100611/https://s3.amazonaws.com/pronto-data/open_data_year_two.zip",
    2016: "https://web.archive.org/web/20170317213337/https://s3.amazonaws.com/pronto-data/open_data_2016-12.zip"
}

In [4]:
gatherer = Gatherer(urls=urls)

In [29]:
gatherer.download(download_path="../data/raw")

Downloading data for 2014:   0%|          | 0/74281286 [00:00<?, ?it/s]

Downloading data for 2015:   0%|          | 0/5762410 [00:00<?, ?it/s]

Downloading data for 2016:   0%|          | 0/6412522 [00:00<?, ?it/s]



In [5]:
gatherer.extract(download_path="../data/raw", extract_path="../data/extracted")

../data/raw/2016.zip successfully extracted to ../data/extracted/2016
../data/raw/2014.zip successfully extracted to ../data/extracted/2014
../data/raw/2015.zip successfully extracted to ../data/extracted/2015


## 2. Data Selection

In [3]:
selector = Selector()

In [4]:
selector.read(directory="../data/extracted/2014/")
selector.read(directory="../data/extracted/2015/")
selector.read(directory="../data/extracted/2016/")

In [5]:
selector.dfs.keys()

dict_keys(['2015_station_data', '2015_trip_data', '2015_weather_data', '2015_status_data', '2016_station_data', '2016_weather_data', '2016_trip_data', '2016-12_station_data', '2016-12_trip_data'])

In [7]:
selector.dfs["2015_trip_data"]

Unnamed: 0,trip_id,starttime,stoptime,bikeid,tripduration,from_station_name,to_station_name,from_station_id,to_station_id,usertype,gender,birthyear
0,431,10/13/2014 10:31,10/13/2014 10:48,SEA00298,985.935,2nd Ave & Spring St,Occidental Park / Occidental Ave S & S Washing...,CBD-06,PS-04,Annual Member,Male,1960.0
1,432,10/13/2014 10:32,10/13/2014 10:48,SEA00195,926.375,2nd Ave & Spring St,Occidental Park / Occidental Ave S & S Washing...,CBD-06,PS-04,Annual Member,Male,1970.0
2,433,10/13/2014 10:33,10/13/2014 10:48,SEA00486,883.831,2nd Ave & Spring St,Occidental Park / Occidental Ave S & S Washing...,CBD-06,PS-04,Annual Member,Female,1988.0
3,434,10/13/2014 10:34,10/13/2014 10:48,SEA00333,865.937,2nd Ave & Spring St,Occidental Park / Occidental Ave S & S Washing...,CBD-06,PS-04,Annual Member,Female,1977.0
4,435,10/13/2014 10:34,10/13/2014 10:49,SEA00202,923.923,2nd Ave & Spring St,Occidental Park / Occidental Ave S & S Washing...,CBD-06,PS-04,Annual Member,Male,1971.0
...,...,...,...,...,...,...,...,...,...,...,...,...
142841,156796,10/12/2015 20:41,10/12/2015 20:47,SEA00358,377.183,E Pine St & 16th Ave,Summit Ave & E Denny Way,CH-07,CH-01,Annual Member,Male,1990.0
142842,156797,10/12/2015 20:43,10/12/2015 20:48,SEA00399,303.330,Bellevue Ave & E Pine St,Summit Ave E & E Republican St,CH-12,CH-03,Annual Member,Male,1978.0
142843,156798,10/12/2015 21:03,10/12/2015 21:06,SEA00204,165.597,Harvard Ave & E Pine St,E Harrison St & Broadway Ave E,CH-09,CH-02,Annual Member,Male,1989.0
142844,156799,10/12/2015 21:35,10/12/2015 21:41,SEA00073,388.576,Pine St & 9th Ave,3rd Ave & Broad St,SLU-16,BT-01,Short-Term Pass Holder,,


In [10]:
selector.dfs["2016_trip_data"]

Unnamed: 0,trip_id,starttime,stoptime,bikeid,tripduration,from_station_name,to_station_name,from_station_id,to_station_id,usertype,gender,birthyear
0,431,10/13/2014 10:31,10/13/2014 10:48,SEA00298,985.935,2nd Ave & Spring St,Occidental Park / Occidental Ave S & S Washing...,CBD-06,PS-04,Member,Male,1960.0
1,432,10/13/2014 10:32,10/13/2014 10:48,SEA00195,926.375,2nd Ave & Spring St,Occidental Park / Occidental Ave S & S Washing...,CBD-06,PS-04,Member,Male,1970.0
2,433,10/13/2014 10:33,10/13/2014 10:48,SEA00486,883.831,2nd Ave & Spring St,Occidental Park / Occidental Ave S & S Washing...,CBD-06,PS-04,Member,Female,1988.0
3,434,10/13/2014 10:34,10/13/2014 10:48,SEA00333,865.937,2nd Ave & Spring St,Occidental Park / Occidental Ave S & S Washing...,CBD-06,PS-04,Member,Female,1977.0
4,435,10/13/2014 10:34,10/13/2014 10:49,SEA00202,923.923,2nd Ave & Spring St,Occidental Park / Occidental Ave S & S Washing...,CBD-06,PS-04,Member,Male,1971.0
...,...,...,...,...,...,...,...,...,...,...,...,...
236060,255241,8/31/2016 23:34,8/31/2016 23:45,SEA00201,679.532,Harvard Ave & E Pine St,2nd Ave & Spring St,CH-09,CBD-06,Short-Term Pass Holder,,
236061,255242,8/31/2016 23:48,9/1/2016 0:20,SEA00247,1965.418,Cal Anderson Park / 11th Ave & Pine St,6th Ave S & S King St,CH-08,ID-04,Short-Term Pass Holder,,
236062,255243,8/31/2016 23:47,9/1/2016 0:20,SEA00300,1951.173,Cal Anderson Park / 11th Ave & Pine St,6th Ave S & S King St,CH-08,ID-04,Short-Term Pass Holder,,
236063,255244,8/31/2016 23:49,9/1/2016 0:20,SEA00047,1883.299,Cal Anderson Park / 11th Ave & Pine St,6th Ave S & S King St,CH-08,ID-04,Short-Term Pass Holder,,


In [11]:
selector.dfs["2016-12_trip_data"]

Unnamed: 0,trip_id,starttime,stoptime,bikeid,tripduration,from_station_name,to_station_name,from_station_id,to_station_id,usertype,gender,birthyear
0,431,10/13/2014 10:31,10/13/2014 10:48,SEA00298,985.935,2nd Ave & Spring St,Occidental Park / Occidental Ave S & S Washing...,CBD-06,PS-04,Member,Male,1960.0
1,432,10/13/2014 10:32,10/13/2014 10:48,SEA00195,926.375,2nd Ave & Spring St,Occidental Park / Occidental Ave S & S Washing...,CBD-06,PS-04,Member,Male,1970.0
2,433,10/13/2014 10:33,10/13/2014 10:48,SEA00486,883.831,2nd Ave & Spring St,Occidental Park / Occidental Ave S & S Washing...,CBD-06,PS-04,Member,Female,1988.0
3,434,10/13/2014 10:34,10/13/2014 10:48,SEA00333,865.937,2nd Ave & Spring St,Occidental Park / Occidental Ave S & S Washing...,CBD-06,PS-04,Member,Female,1977.0
4,435,10/13/2014 10:34,10/13/2014 10:49,SEA00202,923.923,2nd Ave & Spring St,Occidental Park / Occidental Ave S & S Washing...,CBD-06,PS-04,Member,Male,1971.0
...,...,...,...,...,...,...,...,...,...,...,...,...
263131,283148,12/31/2016 23:01,12/31/2016 23:37,SEA00220,2139.413,Burke-Gilman Trail / NE Blakeley St & 24th Ave NE,Burke-Gilman Trail / NE Blakeley St & 24th Ave NE,UD-01,UD-01,Member,Male,1994.0
263132,283149,12/31/2016 23:41,1/1/2017 0:18,SEA00339,2185.038,Seattle Aquarium / Alaskan Way S & Elliott Bay...,Seattle Aquarium / Alaskan Way S & Elliott Bay...,WF-04,WF-04,Short-Term Pass Holder,,
263133,283150,12/31/2016 23:42,1/1/2017 0:18,SEA00126,2173.435,Seattle Aquarium / Alaskan Way S & Elliott Bay...,Seattle Aquarium / Alaskan Way S & Elliott Bay...,WF-04,WF-04,Short-Term Pass Holder,,
263134,283151,12/31/2016 23:47,1/1/2017 0:18,SEA00014,1850.632,Seattle Aquarium / Alaskan Way S & Elliott Bay...,Seattle Aquarium / Alaskan Way S & Elliott Bay...,WF-04,WF-04,Short-Term Pass Holder,,
