In [36]:
import pandas as pd

In [37]:
CITY_DATA = { 'chicago': 'chicago.csv',
              'new york city': 'new_york_city.csv',
              'washington': 'washington.csv' }

In [54]:
def load_data(city, month, day):
    """
    Loads data for the specified city and filters by month and day if applicable.

    Args:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    Returns:
        df - pandas DataFrame containing city data filtered by month and day
    """
    
    # load data file into a dataframe
    df = pd.read_csv(CITY_DATA[city])

    df['Start Time'] = pd.to_datetime(df['Start Time'])
    
    df['month'] = df['Start Time'].dt.month
    
    df['day_of_week'] = df['Start Time'].dt.dayofweek
    
    if month != 'all':
        months = ['january', 'february', 'march', 'april', 'may', 'june', 'july', 'august', 'september', 'october', \
                 'novemeber', 'december']
        month = months.index(month.lower())
        month += 1
        
        df = df[df['month'] == month]
    
    if day != 'all':
        days = ['monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday']
        day = days.index(day.lower())
        
        df = df[df['day_of_week'] == day]
        
    return df
    
df = load_data('chicago', 'all', 'all')

df

Unnamed: 0.1,Unnamed: 0,Start Time,End Time,Trip Duration,Start Station,End Station,User Type,Gender,Birth Year,month,day_of_week
0,1423854,2017-06-23 15:09:32,2017-06-23 15:14:53,321,Wood St & Hubbard St,Damen Ave & Chicago Ave,Subscriber,Male,1992.0,6,4
1,955915,2017-05-25 18:19:03,2017-05-25 18:45:53,1610,Theater on the Lake,Sheffield Ave & Waveland Ave,Subscriber,Female,1992.0,5,3
2,9031,2017-01-04 08:27:49,2017-01-04 08:34:45,416,May St & Taylor St,Wood St & Taylor St,Subscriber,Male,1981.0,1,2
3,304487,2017-03-06 13:49:38,2017-03-06 13:55:28,350,Christiana Ave & Lawrence Ave,St. Louis Ave & Balmoral Ave,Subscriber,Male,1986.0,3,0
4,45207,2017-01-17 14:53:07,2017-01-17 15:02:01,534,Clark St & Randolph St,Desplaines St & Jackson Blvd,Subscriber,Male,1975.0,1,1
...,...,...,...,...,...,...,...,...,...,...,...
299995,64825,2017-01-21 13:18:00,2017-01-21 13:27:50,590,Orleans St & Elm St (*),Sheffield Ave & Webster Ave,Subscriber,Male,1965.0,1,5
299996,695993,2017-04-28 19:32:19,2017-04-28 19:51:18,1139,Ashland Ave & Blackhawk St,Clark St & Elm St,Customer,,,4,4
299997,159685,2017-02-12 09:59:01,2017-02-12 10:21:49,1368,Ravenswood Ave & Lawrence Ave,Stockton Dr & Wrightwood Ave,Subscriber,Female,1988.0,2,6
299998,564681,2017-04-16 17:07:15,2017-04-16 17:19:00,705,Sheffield Ave & Willow St,Clark St & Chicago Ave,Customer,,,4,6


In [39]:
df['Start Time']

0        2017-06-23 15:09:32
1        2017-05-25 18:19:03
2        2017-01-04 08:27:49
3        2017-03-06 13:49:38
4        2017-01-17 14:53:07
                 ...        
299995   2017-01-21 13:18:00
299996   2017-04-28 19:32:19
299997   2017-02-12 09:59:01
299998   2017-04-16 17:07:15
299999   2017-05-30 12:38:28
Name: Start Time, Length: 300000, dtype: datetime64[ns]

In [40]:
df['month'].unique()

array([6, 5, 1, 3, 4, 2], dtype=int64)

In [51]:
df['day_of_week'].unique()

array([4], dtype=int64)

In [59]:
df['day_of_week'].value_counts().idxmax()

1

In [44]:
df = df[df['month'] == 1]
df

Unnamed: 0.1,Unnamed: 0,Start Time,End Time,Trip Duration,Start Station,End Station,User Type,Gender,Birth Year,month,day_of_week
2,9031,2017-01-04 08:27:49,2017-01-04 08:34:45,416,May St & Taylor St,Wood St & Taylor St,Subscriber,Male,1981.0,1,2
4,45207,2017-01-17 14:53:07,2017-01-17 15:02:01,534,Clark St & Randolph St,Desplaines St & Jackson Blvd,Subscriber,Male,1975.0,1,1
7,65924,2017-01-21 14:28:38,2017-01-21 14:40:41,723,Larrabee St & Kingsbury St,Larrabee St & Armitage Ave,Customer,,,1,5
11,71678,2017-01-22 15:15:45,2017-01-22 15:31:02,917,Southport Ave & Wellington Ave,Clark St & Schiller St,Subscriber,Male,1964.0,1,6
12,19061,2017-01-08 16:03:00,2017-01-08 16:07:37,277,Green St & Madison St,Ada St & Washington Blvd,Subscriber,Male,1961.0,1,6
...,...,...,...,...,...,...,...,...,...,...,...
299958,108699,2017-01-31 11:44:34,2017-01-31 11:52:17,463,Indiana Ave & Roosevelt Rd,Michigan Ave & Madison St,Subscriber,Female,1966.0,1,1
299972,44015,2017-01-17 08:42:37,2017-01-17 08:53:20,643,Racine Ave & Wrightwood Ave,Clark St & Wellington Ave,Subscriber,Female,1978.0,1,1
299975,49643,2017-01-18 11:28:36,2017-01-18 11:31:14,158,Dayton St & North Ave,Sheffield Ave & Kingsbury St,Subscriber,Male,1958.0,1,2
299993,68299,2017-01-21 17:16:02,2017-01-21 17:55:11,2349,Lake Shore Dr & Monroe St,McClurg Ct & Erie St,Customer,,,1,5


In [49]:
month = 'january'
months = ['january', 'february', 'march', 'april', 'may', 'june', 'july', 'august', 'september', 'october', \
                 'novemeber', 'december']
month = months.index(month)
month

0

In [60]:
name = 'Nandhan'
name.lower()

'nandhan'

In [63]:
mod_df = df.groupby(['User Type']).size().reset_index().rename(columns={0:'count'})
#print (mod_df.head())
#column_loc = mod_df.columns.get_loc("User Type")
#print (mod_df.iloc[0, column_loc], "are", mod_df.iloc[0, column_loc+1], "in number")
#print (mod_df.iloc[1, column_loc], "are", mod_df.iloc[1, column_loc+1], "in number")

#for index, row in mod_df.iterrows():
#    print(row['User Type'], "are", row['count'], "in number")
mod_df

Unnamed: 0,User Type,count
0,Customer,61110
1,Dependent,1
2,Subscriber,238889


In [72]:
user_type_series = df.groupby('User Type').size()
user_type_series

User Type
Customer       61110
Dependent          1
Subscriber    238889
dtype: int64

In [80]:
user_type_series.index

Index(['Customer', 'Dependent', 'Subscriber'], dtype='object', name='User Type')

In [75]:
user_type_series.values

array([ 61110,      1, 238889], dtype=int64)