# Python idioms that are more widely used in the python community, such as vectorization instead of loops whenever possible.

In [8]:
import pandas as pd
import numpy as np

# to count the time
import timeit

df = pd.read_csv('data/census.csv')
df = df.set_index(['STNAME', 'CTYNAME'])
df.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,SUMLEV,REGION,DIVISION,STATE,COUNTY,CENSUS2010POP,ESTIMATESBASE2010,POPESTIMATE2010,POPESTIMATE2011,POPESTIMATE2012,...,RDOMESTICMIG2011,RDOMESTICMIG2012,RDOMESTICMIG2013,RDOMESTICMIG2014,RDOMESTICMIG2015,RNETMIG2011,RNETMIG2012,RNETMIG2013,RNETMIG2014,RNETMIG2015
STNAME,CTYNAME,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Alabama,Alabama,40,3,6,1,0,4779736,4780127,4785161,4801108,4816089,...,0.002295,-0.193196,0.381066,0.582002,-0.467369,1.030015,0.826644,1.383282,1.724718,0.712594
Alabama,Autauga County,50,3,6,1,1,54571,54571,54660,55253,55175,...,7.242091,-2.915927,-3.012349,2.265971,-2.530799,7.606016,-2.626146,-2.722002,2.59227,-2.187333
Alabama,Baldwin County,50,3,6,1,3,182265,182265,183193,186659,190396,...,14.83296,17.647293,21.845705,19.243287,17.197872,15.844176,18.559627,22.727626,20.317142,18.293499


### Method Chaining:
The general idea behind method chaining is that every method on an object returns a reference to that object.
* AS a result you can combine many methods into a line or one statement of code.

In [35]:
df = df.where(df['SUMLEV'] == 50).dropna().rename(columns={'ESTIMATESBASE2010': 'Estimate Base 2010'})

### Mapping & pandas (apply & applymap functions)

* 'Map' function is used to compare the values between two lists of same size or will combare until the end of one list (this is possible with any iterable object) 

* Pandas has a similar function known as 'applymap' which is used to compare the columns. In applymap you have provide a function which should operate on 'each cell in a dataframe' and then returns set is a DataFrame.

* 'apply' function is pandas is used to map across all of the rows in DataFrame.

In [47]:
def min_max(row):
    data = row[['POPESTIMATE2010',
               'POPESTIMATE2011',
               'POPESTIMATE2012',
               'POPESTIMATE2013',
               'POPESTIMATE2014',
               'POPESTIMATE2015']]
    return pd.Series({'min': np.min(data), 'max': np.max(data)})
# apply function takes the function which contains the operations and the axis as the parameters

df.apply(min_max, axis = 1).head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,min,max
STNAME,CTYNAME,Unnamed: 2_level_1,Unnamed: 3_level_1
Alabama,Autauga County,54660.0,55347.0
Alabama,Baldwin County,183193.0,203709.0
Alabama,Barbour County,26489.0,27341.0


In [54]:
# you can do the above function by added the min and max columns in the 'df' instead of creating a new Series object

def min_max_df(row):
    data = row[['POPESTIMATE2010',
               'POPESTIMATE2011',
               'POPESTIMATE2012',
               'POPESTIMATE2013',
               'POPESTIMATE2014',
               'POPESTIMATE2015']]
    # create a new entry
    row['MIN'] = np.min(data)
    row['MAX'] = np.max(data)
    return row

df = df.apply(min_max_df, axis = 1)
df['MIN'].head()

0     54660.0
1    183193.0
2     26489.0
3     22512.0
4     57373.0
Name: MIN, dtype: float64

### The 'apply' function is an extremely useful tool in the pandas toolkit. 'apply' function is rarely used with the big function definitions, and is instead called with 'lambdas'
Hence, it is important to know how to read lambdas

In [64]:
# following is a way to call 'apply' multiple times on a single line using lambda

row = ['POPESTIMATE2010','POPESTIMATE2011','POPESTIMATE2012','POPESTIMATE2013','POPESTIMATE2014',
           'POPESTIMATE2015']
df.apply(lambda x: np.min(x[row]), axis = 1).head()

# lambda above is just an unnamed function in python, it takes a single parameter x and returns a single value, which
# in this case is the maximum of all columns associated with row x

0     54660.0
1    183193.0
2     26489.0
3     22512.0
4     57373.0
dtype: float64