In [170]:
# LIBRARIES NEEDED:

import pandas as pd
import numpy as np
import datetime as dt

# 1: Load CSV And Explore Structure

In [171]:
df = pd.read_csv("files/chicago.csv")

In [172]:
df.head()

Unnamed: 0.1,Unnamed: 0,Start Time,End Time,Trip Duration,Start Station,End Station,User Type,Gender,Birth Year
0,1423854,2017-06-23 15:09:32,2017-06-23 15:14:53,321,Wood St & Hubbard St,Damen Ave & Chicago Ave,Subscriber,Male,1992.0
1,955915,2017-05-25 18:19:03,2017-05-25 18:45:53,1610,Theater on the Lake,Sheffield Ave & Waveland Ave,Subscriber,Female,1992.0
2,9031,2017-01-04 08:27:49,2017-01-04 08:34:45,416,May St & Taylor St,Wood St & Taylor St,Subscriber,Male,1981.0
3,304487,2017-03-06 13:49:38,2017-03-06 13:55:28,350,Christiana Ave & Lawrence Ave,St. Louis Ave & Balmoral Ave,Subscriber,Male,1986.0
4,45207,2017-01-17 14:53:07,2017-01-17 15:02:01,534,Clark St & Randolph St,Desplaines St & Jackson Blvd,Subscriber,Male,1975.0


In [173]:
df.columns

Index(['Unnamed: 0', 'Start Time', 'End Time', 'Trip Duration',
       'Start Station', 'End Station', 'User Type', 'Gender', 'Birth Year'],
      dtype='object')

In [174]:
df.describe()

Unnamed: 0.1,Unnamed: 0,Trip Duration,Birth Year
count,300000.0,300000.0,238981.0
mean,776345.8,936.23929,1980.858223
std,448146.4,1548.792767,11.003329
min,4.0,60.0,1899.0
25%,387136.8,393.0,1975.0
50%,777103.5,670.0,1984.0
75%,1164065.0,1125.0,1989.0
max,1551500.0,86224.0,2016.0


In [175]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 300000 entries, 0 to 299999
Data columns (total 9 columns):
Unnamed: 0       300000 non-null int64
Start Time       300000 non-null object
End Time         300000 non-null object
Trip Duration    300000 non-null int64
Start Station    300000 non-null object
End Station      300000 non-null object
User Type        300000 non-null object
Gender           238948 non-null object
Birth Year       238981 non-null float64
dtypes: float64(1), int64(2), object(6)
memory usage: 20.6+ MB


# 2: Most Popular Start Hour
Using pandas to load chicago.csv into a df to find the most frequent hour when people start traveling.

In [176]:
# convert the Start Time column to datetime
df['Start Time'] = pd.to_datetime(df['Start Time'])

In [177]:
# extract hour from the Start Time column to create an hour column
df['hour'] = df['Start Time'].dt.hour
df.head()

Unnamed: 0.1,Unnamed: 0,Start Time,End Time,Trip Duration,Start Station,End Station,User Type,Gender,Birth Year,hour
0,1423854,2017-06-23 15:09:32,2017-06-23 15:14:53,321,Wood St & Hubbard St,Damen Ave & Chicago Ave,Subscriber,Male,1992.0,15
1,955915,2017-05-25 18:19:03,2017-05-25 18:45:53,1610,Theater on the Lake,Sheffield Ave & Waveland Ave,Subscriber,Female,1992.0,18
2,9031,2017-01-04 08:27:49,2017-01-04 08:34:45,416,May St & Taylor St,Wood St & Taylor St,Subscriber,Male,1981.0,8
3,304487,2017-03-06 13:49:38,2017-03-06 13:55:28,350,Christiana Ave & Lawrence Ave,St. Louis Ave & Balmoral Ave,Subscriber,Male,1986.0,13
4,45207,2017-01-17 14:53:07,2017-01-17 15:02:01,534,Clark St & Randolph St,Desplaines St & Jackson Blvd,Subscriber,Male,1975.0,14


In [178]:
# finding the most common hour people rent bikes with .mode()
popular_hour = df['hour'].mode()

if popular_hour[0] < 12:
    print('The most popular hour to rent bikes is: ', popular_hour[0], ':00am')
else:
    print('The most popular hour to rent bikes is: ', popular_hour[0], ': 00 pm')

The most popular hour to rent bikes is:  17 : 00 pm


# 3: Breakdown of User Types
There are different types of users specified in the "User Type" column.

In [179]:
# printing value counts for each user type
user_types = pd.Series.value_counts(df['User Type'])
print(user_types)

Subscriber    238889
Customer       61110
Dependent          1
Name: User Type, dtype: int64


# 3: Load and Filter the Dataset
* choosing a dataset to load
* filtering based on a specified month and day

In [181]:
# Loading a dataset based on user specified city input by indexing a global CITY_DATA dictionary object 

CITY_DATA = { 'chicago': 'chicago.csv',
              'new york city': 'new_york_city.csv',
              'washington': 'washington.csv' }
file_name = []

while True:
    try:
        city_name = input("Please enter city: ")
    except ValueError:
        print("Sorry, I didn't understand that.")
        continue

    if city_name not in CITY_DATA.keys():
        print("Sorry, that city is not part of this dataset. Try another one!")
        continue
    else:
        break
if city_name in CITY_DATA.keys():
    file_name = CITY_DATA[city_name]
    print("Thank you. Let me pull that data for you!")
else:
    print("NA")

Please enter city: 
Sorry, that city is not part of this dataset. Try another one!
Please enter city: chicago
Thank you. Let me pull that data for you!


In [182]:
# creating a path from the user input on load csv in pandas data frame
load_csv_path = 'files/'+ file_name
df = pd.read_csv(load_csv_path)
df.head()

Unnamed: 0.1,Unnamed: 0,Start Time,End Time,Trip Duration,Start Station,End Station,User Type,Gender,Birth Year
0,1423854,2017-06-23 15:09:32,2017-06-23 15:14:53,321,Wood St & Hubbard St,Damen Ave & Chicago Ave,Subscriber,Male,1992.0
1,955915,2017-05-25 18:19:03,2017-05-25 18:45:53,1610,Theater on the Lake,Sheffield Ave & Waveland Ave,Subscriber,Female,1992.0
2,9031,2017-01-04 08:27:49,2017-01-04 08:34:45,416,May St & Taylor St,Wood St & Taylor St,Subscriber,Male,1981.0
3,304487,2017-03-06 13:49:38,2017-03-06 13:55:28,350,Christiana Ave & Lawrence Ave,St. Louis Ave & Balmoral Ave,Subscriber,Male,1986.0
4,45207,2017-01-17 14:53:07,2017-01-17 15:02:01,534,Clark St & Randolph St,Desplaines St & Jackson Blvd,Subscriber,Male,1975.0


In [183]:
# Creating month and day_of_week columns
# Converting the "Start Time" column to datetime and extract the month number and weekday name into separate columns

df['Start Time'] = pd.to_datetime(df['Start Time'])
df['month'] = df['Start Time'].dt.month
df['day_of_week'] = df[['Start Time']].apply(lambda x: dt.datetime.strftime(x['Start Time'], '%A'), axis=1)
df.head()

Unnamed: 0.1,Unnamed: 0,Start Time,End Time,Trip Duration,Start Station,End Station,User Type,Gender,Birth Year,month,day_of_week
0,1423854,2017-06-23 15:09:32,2017-06-23 15:14:53,321,Wood St & Hubbard St,Damen Ave & Chicago Ave,Subscriber,Male,1992.0,6,Friday
1,955915,2017-05-25 18:19:03,2017-05-25 18:45:53,1610,Theater on the Lake,Sheffield Ave & Waveland Ave,Subscriber,Female,1992.0,5,Thursday
2,9031,2017-01-04 08:27:49,2017-01-04 08:34:45,416,May St & Taylor St,Wood St & Taylor St,Subscriber,Male,1981.0,1,Wednesday
3,304487,2017-03-06 13:49:38,2017-03-06 13:55:28,350,Christiana Ave & Lawrence Ave,St. Louis Ave & Balmoral Ave,Subscriber,Male,1986.0,3,Monday
4,45207,2017-01-17 14:53:07,2017-01-17 15:02:01,534,Clark St & Randolph St,Desplaines St & Jackson Blvd,Subscriber,Male,1975.0,1,Tuesday


In [211]:
# Taking input from user to filter for weekday and month

week_days = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
filter_choice = []

while True:
    try:
        get_month = int(input("Please enter the number of the month: "))
        get_day = input("Please enter the name of the day: ")
    except ValueError:
        print("Sorry, I didn't understand that. Please type month as a number and day as a string.")
        continue
    if get_day not in week_days:
        print("Sorry, that's not a day")
        continue
    elif get_month not in range(1,13):
        print("Sorry, that's not a month")
        continue
    else:
        break
if get_day in week_days:
    filter_choice = get_day, get_month
    print("Thank you for letting us know what you're interessted in.")
else:
    print("NA")

Please enter the number of the month: 6
Please enter the name of the day: Monday
Thank you for letting us know what you're interessted in.


In [212]:
print('We will now filter the dataset for month: ', filter_choice[1], '')
print('We will now filter the dataset by: ', )

We will now filter the dataset for month:  6
We will now filter the dataset by:  Monday


In [207]:
# filtering by day of week based on user input

filtered_for_month = df[df['month']==filter_choice[1]]
filtered_for_day = filtered_month[filtered_month['day_of_week']==filter_choice[0]]
                 
filtered_day.head()

Unnamed: 0.1,Unnamed: 0,Start Time,End Time,Trip Duration,Start Station,End Station,User Type,Gender,Birth Year,month,day_of_week
5,1473887,2017-06-26 09:01:20,2017-06-26 09:11:06,586,Clinton St & Washington Blvd,Canal St & Taylor St,Subscriber,Male,1990.0,6,Monday
32,1359055,2017-06-19 17:43:17,2017-06-19 17:59:35,978,Larrabee St & Armitage Ave,Dearborn Pkwy & Delaware Pl,Subscriber,Female,1993.0,6,Monday
78,1243183,2017-06-12 09:43:25,2017-06-12 09:58:30,905,State St & Randolph St,Streeter Dr & Grand Ave,Subscriber,Female,1978.0,6,Monday
80,1239988,2017-06-12 06:43:37,2017-06-12 06:50:09,392,Ogden Ave & Race Ave,Green St & Madison St,Subscriber,Male,1984.0,6,Monday
84,1128408,2017-06-05 17:39:03,2017-06-05 18:09:34,1831,Michigan Ave & Oak St,Clinton St & Roosevelt Rd,Subscriber,Female,1983.0,6,Monday
