In [1]:
import time
import pandas as pd
import numpy as np

CITY_DATA = { 'chicago': 'chicago.csv',
              'new york city': 'new_york_city.csv',
              'washington': 'washington.csv' }

def get_filters():

    '''
    Asks user to enter a city, month, and day to analyze.

    Returns:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    '''
    
    print('Hello! Let\'s explore some US bikeshare data!\n')
    # TO DO: get user input for city (chicago, new york city, washington)
    city=''
    
    while city not in CITY_DATA.keys():
        print('Enter the city name Chicago or New York City or Washington\n')
        
        city=input().lower()  # converted the city name into lower case
        
        if city not in CITY_DATA.keys():
            print('Invalid Input, enter the city name correctly')
            print('start again')
    print('user has typed the city as: {} \n'.format(city))
    
    # only 6 months' data is available
    
    month_list=['january','february','march','april','may','june','all']
    month=''

    # TO DO: get user input for month (all, january, february, ... , june)
    
    while month not in month_list:
        print('Enter the full month name between january or February or March or April or May or june or All\n')
        month=input().lower()  # converted the month name into lower case
        
        if month not in month_list:
            
            print(' Invalid Input, please type the full month name correctly')
            print('start again')
    print('user has typed the month name as: {}\n'.format(month))
    
    # TO DO: get user input for day of week (all, monday, tuesday, ... sunday)
    
    day_list=['all','monday','tuesday','wednesday','thursday','friday','sunday']
    
    day=''
    
    while day not in day_list:
        
        print('enter the day name')
        day=input().lower()
        
        if day not in day_list:
            
            print('Invalid Input, plese type the full week day name')
            print('start again')
    print('user has selected the weekday as: {}\n'.format(day))
    print(f'user has selected City {city.title()} and month/s {month.title()} and day/s {day.title()}')
    
    print('-'*40)
    
    # Returning the city, month and day data
    return city, month, day


def load_data(city, month, day):
    '''
    Loads data for the specified city and filters by month and day if applicable.

    Args:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    Returns:
        df - Pandas DataFrame containing city data filtered by month and day
        
    '''
    
    df=pd.read_csv(CITY_DATA[city])
    
    # convert start time to datetime colums
    
    df['Start Time']=pd.to_datetime(df['Start Time'])

    # find the month and day from city table and created the new columns for month day
    
    df['month']=df['Start Time'].dt.month
    df['day_of_week']=df['Start Time'].dt.day_name()
    
    if month!= 'all':
        # used the index method to get the list index
        months = ['january', 'february', 'march', 'april', 'may', 'june']
        
        month=months.index(month)+1
        
        # filter by month creating a new dataframe
        df=df[df['month']==month]
        print(df)
        
    if day!= 'all':
        
        # filter by month creating a new dataframe
        
        
        df=df[df['day_of_week']==day.title()]
        print(df)

    return df


def time_stats(df):
    
    '''Displays statistics on the most frequent times of travel.'''

    print('\nCalculating The Most Frequent Times of Travel...\n')
    
    start_time = time.time()

    # TO DO: display the most common month
    
    popular_month=df['month'].mode()[0]
    
    print('most popular month is: {}'.format(popular_month))


    # TO DO: display the most common day of week
    
    popular_weekday=df['day_of_week'].mode()[0]
    
    print(f'most popular day of the week is {popular_weekday.title()}')

    # TO DO: display the most common start hour
    
    df['hour']= df['Start Time'].dt.hour
    
    popular_start_hour= df['hour'].mode()[0]
    
    print('most popular start hour is {}'.format(popular_start_hour))
    

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


def station_stats(df):
    
    '''Displays statistics on the most popular stations and trip.'''

    print('\nCalculating The Most Popular Stations and Trip...\n')
    start_time = time.time()

    # TO DO: display most commonly used start station
    
    popular_start_station= df['Start Station'].mode()[0]
    
    print('most popular start station is {}'.format(popular_start_station))
    

    # TO DO: display most commonly used end station
    
    popular_end_station = df['End Station'].mode()[0]
    
    print('most popular end station is {}'.format(popular_end_station))


    # TO DO: display most frequent combination of start station and end station trip (joined both columns start station and 
    # end station with underscore)
    
    df['Start to End'] = df['Start Station'] + '__' +df['End Station']
    
    # frequest combinations of start station and end station trip
     
    start_to_end_combo=  df['Start to End'].mode()[0]
    
    print('most frequent combination of start station and end station trip is {}'.format(start_to_end_combo))
    

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*100)


def trip_duration_stats(df):
    """Displays statistics on the total and average trip duration."""

    print('\nCalculating Trip Duration...\n')
    start_time = time.time()

    # TO DO: display total travel time
    
    total_travel_time=df['Trip Duration'].sum()
    
    print('Total travel time: {}'.format(total_travel_time))
    

    # TO DO: display mean travel time

    ave_travel_time=df['Trip Duration'].mean()
    
    print('Average travel time: {} '.format(ave_travel_time))

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*100)


def user_stats(df):
    '''Displays statistics on bikeshare users.'''

    print('\nCalculating User Stats...\n')
    start_time = time.time()

    # TO DO: Display counts of user types
    
    user_type=df['User Type'].value_counts()
    
    print('user types count are: {}\n'.format(user_type))


    # TO DO: Display counts of gender
    try:
        
    
        gender_count = df['Gender'].value_counts()
        print('gender count is: {}'.format(gender_count))
        
    except:
        
        print('there is no gender in this file\n')


    # TO DO: Display earliest, most recent, and most common year of birth
    
    try:          
    
        earlier_year=df['Birth Year'].min()
    
        print('the earliest Birth Year is: {}'.format(earlier_year))
    
        latest_year=df['Birth Year'].max()
        print('latest birth year is: {}'.format(latest_year))
        common_year=df['Birth Year'].mode()[0]
        print('most common birth year is: {}'.format(common_year))
        
    except:
        
        print('there is no Birth year data/columnin this file')


    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*100)


def display_data(df):
    
    '''
    display 5 rows data from the selected city csv file

    '''
    user_responses=['yes','no']
    counter=0
    rdata=''
    
    while rdata not in user_responses:
        
        rdata=input('do you want to see row data enter responses as Yes or No').lower()
        
        if rdata=='yes':
            print(df.head())
        elif rdata not in user_responses:
            print('invalid input, enter the response correctly\n')
            print('start again')
    
    while rdata=='yes':
        
        print('do you wish to see raw data')
        rdata=input().lower()
        
        counter=counter+5
        if rdata=='yes':
            
            print(df[counter:counter+5])
        if rdata!='yes':
            break
            
    print('-'*100)
            


def main():
    while True:
        city, month, day = get_filters()
        df = load_data(city, month, day)
        display_data(df)
        time_stats(df)
        station_stats(df)
        trip_duration_stats(df)
        user_stats(df)
        

        restart = input('\nWould you like to restart? Enter yes or no.\n')
        if restart.lower() != 'yes':
            break


if __name__ == "__main__":
    main()


Hello! Let's explore some US bikeshare data!

Enter the city name Chicago or New York City or Washington

new york city
user has typed the city as: new york city 

Enter the full month name between january or February or March or April or May or june or All

all
user has typed the month name as: all

enter the day name
all
user has selected the weekday as: all

user has selected City New York City and month/s All and day/s All
----------------------------------------
do you want to see row data enter responses as Yes or Noyes
   Unnamed: 0          Start Time             End Time  Trip Duration  \
0     5688089 2017-06-11 14:55:05  2017-06-11 15:08:21            795   
1     4096714 2017-05-11 15:30:11  2017-05-11 15:41:43            692   
2     2173887 2017-03-29 13:26:26  2017-03-29 13:48:31           1325   
3     3945638 2017-05-08 19:47:18  2017-05-08 19:59:01            703   
4     6208972 2017-06-21 07:49:16  2017-06-21 07:54:46            329   

             Start Station   