# Explore US Bikeshare Data 
In this project, I use Python to explore data related to bike share systems for three major cities in the United Statesâ€”Chicago, New York City, and Washington. I answered interesting questions about it by computing descriptive statistics. I also write a script that takes in raw input to create an interactive experience in the terminal to present these statistics.

In [1]:
#importing libraries
import numpy as np
import pandas as pd

In [2]:
def get_city():
    '''Get the city to perform data analysis on it.'''
    city = input("Would you like to see data for Chicago, New York, or Washington?").lower()
    while city not in CITY_DATA:
        print("Please choose a city from the given cities")
        city = input("Would you like to see data for Chicago, New York, or Washington?").lower()    
    return city

In [3]:
def filtering_features():
    '''Get filering method from user.'''
    time_filters = ['month', 'day', 'both', 'none']    
    time_filter = input("Would you like to filter data by month, day, both, or not at all? Type none for no time filter.").lower()
    while time_filter not in time_filters:
        print("Please enter a correct time filter")
        time_filter = input("Would you like to filter data by month, day, both, or not at all? Type none for no time filter.").lower()
    return time_filter

In [4]:
def get_month():
    '''Get specific month from user to filter data based on it.'''
    months = ['january', 'february', 'march', 'april', 'may', 'june']
    month = input("Which month? January, February, March, April, May, or June").lower()
    while month not in months:
        print("Please enter a correct month from the given months")
        month = input("Which month? January, February, March, April, May, or June").lower()  
    return month

In [5]:
def get_day():
    '''Get specific day from user to filter data based on it.'''
    days = ['monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday']
    day = input("Which day? Monday, Tuesday, Wednesday, Thursday, Friday, Saturday, or Sunday").lower()
    while day not in days:
        print("Please enter a correct day from the given days")
        day = input("Which day? Monday, Tuesday, Wednesday, Thursday, Friday, Saturday, or Sunday").lower()
    return day

In [6]:
def get_time_filters():
    '''Return month and day to filter data using them'''
    time_filter = filtering_features()
        
    if time_filter == 'month':
        month = get_month()
        day = 'all'
    
    elif time_filter == 'day':
        day = get_day()
        month = 'all'
   
    elif time_filter == 'both':
        month = get_month()       
        day = get_day()
   
    elif time_filter == 'none':
        month = 'all'
        day = 'all'
    
    return month, day
    

In [7]:
CITY_DATA = { 'chicago': 'chicago.csv',
              'new york city': 'new_york_city.csv',
              'washington': 'washington.csv' }

In [8]:
def load_data(city, month, day):
    '''
    Loads data for the specified city and filters by month and day if applicable.
    
    Args:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    Returns:
        df - Pandas DataFrame containing city data filtered by month and day 
    '''
    city_data = pd.read_csv(CITY_DATA[city])

    city_data['Start Time'] = pd.to_datetime(city_data['Start Time'])
    city_data['day_of_week'] = city_data['Start Time'].dt.day_name()
    city_data['day_of_week'] = city_data['day_of_week'].apply(lambda day:day.lower())
    city_data['month'] = city_data['Start Time'].dt.month
    city_data['hour'] = city_data['Start Time'].dt.hour
    city_data['trip_stations'] = city_data['Start Station'] + "->" +city_data['End Station']

    if month != 'all':
        month = month.lower() 
        months = ['january', 'february', 'march', 'april', 'may', 'june']
        months_map = pd.DataFrame(data = np.arange(1,7).reshape(1,6), columns= months)
        month = months_map[month].values[0]
        city_data = city_data[city_data['month'] == month]
    
    if day !='all':
        day = day.lower()
        city_data = city_data[city_data['day_of_week'] == day]        
    
    return city_data

In [9]:
def time_statistical_analysis(city_data):
    '''
    Calculate time based statistical analysis on a pandas Data frame containing filtered city data .
    
    Args:
        (Pandas DataFrame) city_data - data frame of the data to do analysis on it 
    '''
    popular_month = city_data['month'].mode()[0]
    popular_day = city_data['day_of_week'].mode()[0]
    popular_hour = city_data['hour'].mode()[0]
    print("The most common bikeshare month: {}".format(popular_month))
    print("The most common bikeshare day of weak: {}".format(popular_day))
    print("The most common bikeshare hour of day: {}".format(popular_hour))

In [10]:
def stations_statistical_analysis(city_data):
    '''
    Calculate place based statistical analysis on a pandas Data frame containing filtered city data .
    
    Args:
        (Pandas DataFrame) city_data - data frame of the data to do analysis on it 
    '''    
    popular_start_station = city_data['Start Station'].mode()[0]
    popular_end_station = city_data['End Station'].mode()[0]
    print("The most common bikeshare start station: {}".format(popular_start_station))
    print("The most common bikeshare end station: {}".format(popular_end_station))

In [11]:
def trip_statistical_analysis(city_data):
    '''
    Calculate statistical analysis on trips of a pandas Data frame containing filtered city data .
    
    Args:
        (Pandas DataFrame) city_data - data frame of the data to do analysis on it 
    '''  
    popular_trip = city_data['trip_stations'].mode()[0]
    trip_travel_time = city_data['Trip Duration'].sum()
    trip_average_time = city_data['Trip Duration'].mean()
    print("The most common bikeshare trip from {} to {}".format(popular_trip.split("->")[0],popular_trip.split("->")[1]))
    print("Trip total travel time: {}".format(trip_travel_time))
    print("Trip average travel time: {}".format(trip_average_time))

In [12]:
def user_info(city_data, city):
    '''
    Calculate statistical analysis on the users using bikeshare system.
    
    Args:
        (Pandas DataFrame) city_data - data frame of the data to do analysis on it 
        (str) city - name of the city to analyze 
    '''  
    user_types = city_data['User Type'].value_counts()
    print ("User types:\n{}".format(user_types))
    if city != "washington":    
        gender_type = city_data['Gender'].value_counts()
        earliest_year_of_birth = city_data['Birth Year'].min()
        most_recent_year_of_birth = city_data['Birth Year'].max()
        most_common_year_of_birth = city_data['Birth Year'].mode()[0]
        print ("Gender types:\n{}".format(gender_type))
        print ("The earliest year of birth: {}".format(int(earliest_year_of_birth)))
        print ("The most recent year of birth: {}".format(int(most_recent_year_of_birth)))
        print ("The most common year of birth: {}".format(int(most_common_year_of_birth)))
        

In [13]:
def statistical_analysis(city_data, city):
        '''
        Calculate statistical analysis on the filtered data of a given city.
        
        Args:
             (Pandas DataFrame) city_data - data frame of the data to do analysis on it 
             (str) city - name of the city to analyze        
        '''
        time_statistical_analysis(city_data)
        stations_statistical_analysis(city_data)
        trip_statistical_analysis(city_data)
        user_info(city_data, city)

In [14]:
def display_data_sample(city_data):
    '''Display data samples of a given data frame'''
    i = 0
    while True:
        try:
            display_Sample_data = input('Would you want to see the raw data? Type Yes or No').lower()
        except:
            print("You should enter Yes or No")
            continue
        if display_Sample_data == 'yes':
            for data_sample in np.arange(i,i+5):
                print(city_data.iloc[data_sample])
            i += 5
        elif display_Sample_data == 'no':
            i = 0
            break
    


In [15]:
def restart_program():
    '''
        Restart the program based on the user input
        
        Return:
            yes : user wants to restart the program
            No : user does not want to restart the program
    '''
    restart = input("Would you like to restart? Type Yes or No").lower()
    while restart not in ['yes','no']:
        print("you should enter Yes or No")
        restart = input("Would you like to restart? Type Yes or No").lower()
    return restart

In [20]:
def main():
    while True:
        print("Hello! let's get some insights from US bikeshare data")
        city = get_city()
        month, day = get_time_filters()
        city_data = load_data(city, month, day)
        statistical_analysis(city_data, city)
        display_data_sample(city_data)
        restart = restart_program()
        if restart == 'yes':
            continue
        elif restart == 'no':
            break



In [21]:
main()

Hello! let's get some insights from US bikeshare data
The most common bikeshare month: 4
The most common bikeshare day of weak: saturday
The most common bikeshare hour of day: 17
The most common bikeshare start station: Streeter Dr & Grand Ave
The most common bikeshare end station: Streeter Dr & Grand Ave
The most common bikeshare trip from Lake Shore Dr & Monroe St to Streeter Dr & Grand Ave
Trip total travel time: 50699234
Trip average travel time: 981.4211270059428
User types:
Subscriber    39829
Customer      11830
Name: User Type, dtype: int64
Gender types:
Male      30250
Female     9585
Name: Gender, dtype: int64
The earliest year of birth: 1899
The most recent year of birth: 2016
The most common year of birth: 1989
Unnamed: 0                                                  606841
Start Time                                     2017-04-20 16:08:51
End Time                                       2017-04-20 16:20:20
Trip Duration                                                  689