# SELECTING or ACCESSING Data

In [1]:
import pandas as pd
cityD={'Population':10927986,'Hospitals':189,'Schools':7916}
cityM={'Population':12691836,'Hospitals':208,'Schools':8508}
cityK={'Population':4631392,'Hospitals':149,'Schools':7226}
cityC={'Population':4328063,'Hospitals':157,'Schools':7617}
cities=[cityD,cityM,cityK,cityC]
df5=pd.DataFrame(cities,index=["Delhi","Mumbai","Kolkata","Chennai"])
df5

Unnamed: 0,Population,Hospitals,Schools
Delhi,10927986,189,7916
Mumbai,12691836,208,8508
Kolkata,4631392,149,7226
Chennai,4328063,157,7617


# (a) Selecting/Accessing a Column

In [2]:
df5.Population        # using dot notations

Delhi      10927986
Mumbai     12691836
Kolkata     4631392
Chennai     4328063
Name: Population, dtype: int64

In [3]:
df5['Schools']        # using square brackets

Delhi      7916
Mumbai     8508
Kolkata    7226
Chennai    7617
Name: Schools, dtype: int64

# (b) Selecting/Accessing Multiple Columns

In [4]:
df5[['Schools','Hospitals']]   #ensure double square brackets, List have multiple column names given inside the square brackets

Unnamed: 0,Schools,Hospitals
Delhi,7916,189
Mumbai,8508,208
Kolkata,7226,149
Chennai,7617,157


# (c) Selecting/Accessing a Subset from a DataFrame using Row/Column Names

(i) to access a Row

In [5]:
df5.loc['Delhi',:]              #to access one row   <df>.loc[<row label>,:]

Population    10927986
Hospitals          189
Schools           7916
Name: Delhi, dtype: int64

(ii) to access Multiple Rows

In [6]:
df5.loc['Mumbai':'Chennai',:]    #to access multiple rows   <df>.loc[<start row label>:<end row label>,:]

Unnamed: 0,Population,Hospitals,Schools
Mumbai,12691836,208,8508
Kolkata,4631392,149,7226
Chennai,4328063,157,7617


(iii) to access Selective Columns

In [7]:
df5.loc[:,'Population':'Schools']     #to access multiple colums   <df>.loc[:,<start column label>:<end column label>]

Unnamed: 0,Population,Hospitals,Schools
Delhi,10927986,189,7916
Mumbai,12691836,208,8508
Kolkata,4631392,149,7226
Chennai,4328063,157,7617


(iv) to access Range of Columns from a Range of Rows 

In [8]:
df5.loc['Mumbai':'Chennai','Population':'Hospitals']   # <df>.loc[<start row>:<end row> ,<start column>:<end column>]
                                                       #Selecting a Range of Columns from a Range of Rows

Unnamed: 0,Population,Hospitals
Mumbai,12691836,208
Kolkata,4631392,149
Chennai,4328063,157



# (d) Selecting Rows/Columns from a DataFrame

You can extract  subset from a DataFrame using the Row and column numeric index/position, we use iloc instead of loc.

In [9]:
df5.iloc[0:2,1:3]        #with iloc, the end index is excluded in the result

Unnamed: 0,Hospitals,Schools
Delhi,189,7916
Mumbai,208,8508


In [10]:
df5.iloc[:,1:3] 

Unnamed: 0,Hospitals,Schools
Delhi,189,7916
Mumbai,208,8508
Kolkata,149,7226
Chennai,157,7617


In [11]:
df5.iloc[0:2,:] 

Unnamed: 0,Population,Hospitals,Schools
Delhi,10927986,189,7916
Mumbai,12691836,208,8508


In [12]:
df5[1:3]       #without loc or iloc it will give the result only for the rows excluding the end index

Unnamed: 0,Population,Hospitals,Schools
Mumbai,12691836,208,8508
Kolkata,4631392,149,7226


# (e) Selecting/Accesing the Individual Value

In [13]:
df5.Population['Mumbai']                    #<DF object>.<column>[row name]

12691836

In [14]:
df5.Population[1]                           #<DF object>.<column>[row index]

12691836

You can use at or iat attributes with DF object

In [15]:
df5.at['Chennai','Schools']               #<DF object>.at[<row label>,<column label>]

7617

In [16]:
df5.iat[3,2]                              #<DF object>.iat[<row index>,<column index>]

7617

# (f) Selecting DataFrame Rows/Columns based on Boolean Conditiions

df.[column name]condition    OR     df.loc[column name]condition

In [17]:
df5

Unnamed: 0,Population,Hospitals,Schools
Delhi,10927986,189,7916
Mumbai,12691836,208,8508
Kolkata,4631392,149,7226
Chennai,4328063,157,7617


In [18]:
df5['Schools']>7500                  #this will only return the result either 'true' or 'false'

Delhi       True
Mumbai      True
Kolkata    False
Chennai     True
Name: Schools, dtype: bool

In [19]:
df5[df5['Schools']>7500 ]    #df[df.[column name]condition]
                             #this will return the subset from the DF with actual data values where the given condition is true.

Unnamed: 0,Population,Hospitals,Schools
Delhi,10927986,189,7916
Mumbai,12691836,208,8508
Chennai,4328063,157,7617


In [20]:
df5.loc[df5['Population']>10000000 ]     # .loc will be applied with the outer df, not with the condition inside.

Unnamed: 0,Population,Hospitals,Schools
Delhi,10927986,189,7916
Mumbai,12691836,208,8508
