# Pandas 
## Document key learning over and above learnt from PDA and Python 101

In [1]:
import numpy as np
import pandas as pd

## Series Data structure 
- Is like a dictionary but an ordered form of dictionary
- Computation is much faster than comparable sequences like lists 
- Because Data is stored underneath using numpy arrays

In [5]:
# Series takes in data in form of list, array, dics etc

In [6]:
pd.Series(np.array([1,2,3])) # data type is recognized as int

0    1
1    2
2    3
dtype: int64

In [9]:
x = pd.Series(['a', 'b', 'd']) # notice data type for list of strings is object

In [10]:
x

0    a
1    b
2    d
dtype: object

In [13]:
x.index # default integer index

RangeIndex(start=0, stop=3, step=1)

In [14]:
x.values

array(['a', 'b', 'd'], dtype=object)

In [15]:
y  = pd.Series(['a', 'b', None]) # None is converted and th values are stored as dtype object

In [16]:
y

0       a
1       b
2    None
dtype: object

In [29]:
type(y.values[2])

NoneType

In [30]:
type(None)

NoneType

In [31]:
z  = pd.Series([1, 3, None]) # Now the None is stored as NaN, which is a numeric value, overall object is float64

In [32]:
z

0    1.0
1    3.0
2    NaN
dtype: float64

In [33]:
np.isnan(z.values[2])

True

In [39]:
np.nan == None # nan is not same as None. None is a different type (NoneType) than nan(float). They represent 
# absence of a value, and non numeric value resp.

False

In [40]:
type(np.nan)

float

In [41]:
sports = {'Archery': 'Bhutan',
          'Golf': 'Scotland',
          'Sumo': 'Japan',
          'Taekwondo': 'South Korea'}
s = pd.Series(sports)
s

Archery           Bhutan
Golf            Scotland
Sumo               Japan
Taekwondo    South Korea
dtype: object

In [43]:
# If you specify a subset of index values, when converting a dictionary, only values for specified indices are retained 
# New are converted to NaN
sports = {'Archery': 'Bhutan',
          'Golf': 'Scotland',
          'Sumo': 'Japan',
          'Taekwondo': 'South Korea'}
s = pd.Series(sports, index=['Golf', 'Sumo', 'Hockey'])
s

Golf      Scotland
Sumo         Japan
Hockey         NaN
dtype: object

# Querying a Series

## In pandas slicing (use of [ ]) creates views, indexing (using loc anc iloc) creates copies

In [121]:
# Use of loc and iloc
sports = {'Archery': 'Bhutan',
          'Golf': 'Scotland',
          'Sumo': 'Japan',
          'Taekwondo': 'South Korea'}
s = pd.Series(sports)
s

Archery           Bhutan
Golf            Scotland
Sumo               Japan
Taekwondo    South Korea
dtype: object

## loc and iloc are not methods, but attributes, they use [] and not ()

In [108]:
s.iloc[[0,2,3]]

Archery           Bhutan
Sumo               Japan
Taekwondo    South Korea
dtype: object

In [109]:
s.loc[['Golf', 'Sumo']]

Golf    Scotland
Sumo       Japan
dtype: object

## Slicing a view, and making a change will broadcast change ( true for pandas - i.e even for Dataframe)

In [122]:
t = s['Archery':'Sumo']
t['Sumo'] = 'Jap'
t

Archery      Bhutan
Golf       Scotland
Sumo            Jap
dtype: object

In [123]:
s

Archery           Bhutan
Golf            Scotland
Sumo                 Jap
Taekwondo    South Korea
dtype: object

## Indexing using loc will create copy

In [125]:
t = s.loc[['Archery','Golf','Sumo']]
t['Sumo'] = 'Japan'
t

Archery      Bhutan
Golf       Scotland
Sumo          Japan
dtype: object

In [126]:
s

Archery           Bhutan
Golf            Scotland
Sumo                 Jap
Taekwondo    South Korea
dtype: object

In [49]:
# Explicitly stating index is bettr 
# If index is integer, pandas throws error
sports = {99: 'Bhutan',
          100: 'Scotland',
          101: 'Japan',
          102: 'South Korea'}
s = pd.Series(sports)
s

99          Bhutan
100       Scotland
101          Japan
102    South Korea
dtype: object

In [51]:
#s[0] throws an error

## Using vectorized methods to compute series 
- numpy and pandas methods using vectorization, to make computations fast 
- Avoid using loops, where possible

In [52]:
# Demonstration of adding elements of a series
x = pd.Series(np.random.randn(1000))

In [53]:
x.head()

0    0.633458
1   -0.199338
2   -0.336314
3   -0.094835
4   -0.575042
dtype: float64

#### %timeit mgic function computes average time to compute an expression running it with repetitions
#### %%timeit evaluates expressions in a jupyter cell

In [56]:
%%timeit -n 100
np.sum(x)

50.3 µs ± 15.3 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [59]:
%%timeit -n 100
s = 0
for i in range(len(x)):
    s += x.iloc[i]

8.27 ms ± 282 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


## Series values and Index can have mixed types. No error is thrown, coercion takes place
## As data is added to series, the dtype of series changes

In [65]:
s = pd.Series([1, 2, 3])
s

0    1
1    2
2    3
dtype: int64

In [66]:
s.loc['Animal'] = 'Bears'
s

0             1
1             2
2             3
Animal    Bears
dtype: object

## Index can hve Duplicate values, it is not supposed to be unique

#### Series can be appended, Notice that append mthod on a Pandas series does not make an in place change, in python lists it does

In [70]:
original_sports = pd.Series({'Archery': 'Bhutan',
                             'Golf': 'Scotland',
                             'Sumo': 'Japan',
                             'Taekwondo': 'South Korea'})
cricket_loving_countries = pd.Series(['Australia',
                                      'Barbados',
                                      'Pakistan',
                                      'England'], 
                                   index=['Cricket',
                                          'Cricket',
                                          'Cricket',
                                          'Cricket'])
all_countries = original_sports.append(cricket_loving_countries)

In [71]:
original_sports

Archery           Bhutan
Golf            Scotland
Sumo               Japan
Taekwondo    South Korea
dtype: object

In [73]:
all_countries

Archery           Bhutan
Golf            Scotland
Sumo               Japan
Taekwondo    South Korea
Cricket        Australia
Cricket         Barbados
Cricket         Pakistan
Cricket          England
dtype: object

# Data Frame Data Structure
- index attribute is is for rows, columns is for columns, both are index data structures, which implies
they need not be unique. 
- Rows and columns are easily interchangeable using transpose 

In [70]:
purchase_1 = pd.Series({'Name': 'Chris',
                        'Item Purchased': 'Dog Food',
                        'Cost': 22.50})
purchase_2 = pd.Series({'Name': 'Kevyn',
                        'Item Purchased': 'Kitty Litter',
                        'Cost': 2.50})
purchase_3 = pd.Series({'Name': 'Vinod',
                        'Item Purchased': 'Bird Seed',
                        'Cost': 5.00})
df = pd.DataFrame([purchase_1, purchase_2, purchase_3], index=['Store 1', 'Store 1', 'Store 2'])
df.head()

Unnamed: 0,Cost,Item Purchased,Name
Store 1,22.5,Dog Food,Chris
Store 1,2.5,Kitty Litter,Kevyn
Store 2,5.0,Bird Seed,Vinod


### .loc and .iloc support mixed selection
### Slicing means using 'start', 'stop' notations, it creates  views
### indexing is using .loc without using start', 'stop' notations
### Creating views only is fast and memory efficient, but changes to the view are broadcast

In [71]:
# Column selection, uses indexing with loc, and thus creates a copy
temp = df.iloc[:,[0,1]]
temp

Unnamed: 0,Cost,Item Purchased
Store 1,22.5,Dog Food
Store 1,2.5,Kitty Litter
Store 2,5.0,Bird Seed


In [72]:
temp.loc['Store 2', 'Cost'] = 20  # False positive warning, this is a copy and not a slice

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


In [73]:
temp

Unnamed: 0,Cost,Item Purchased
Store 1,22.5,Dog Food
Store 1,2.5,Kitty Litter
Store 2,20.0,Bird Seed


In [74]:
df

Unnamed: 0,Cost,Item Purchased,Name
Store 1,22.5,Dog Food,Chris
Store 1,2.5,Kitty Litter,Kevyn
Store 2,5.0,Bird Seed,Vinod


In [75]:
temp = df.loc[:,['Cost', 'Name']]  # indexing, creates a copy not a view
print(df)
temp[temp['Cost'] < 6] = 0
print(temp)
df

         Cost Item Purchased   Name
Store 1  22.5       Dog Food  Chris
Store 1   2.5   Kitty Litter  Kevyn
Store 2   5.0      Bird Seed  Vinod
         Cost   Name
Store 1  22.5  Chris
Store 1   0.0      0
Store 2   0.0      0


Unnamed: 0,Cost,Item Purchased,Name
Store 1,22.5,Dog Food,Chris
Store 1,2.5,Kitty Litter,Kevyn
Store 2,5.0,Bird Seed,Vinod


In [85]:
# Row selection
df.loc[['Store 1', 'Store 2'],:]

Unnamed: 0,Cost,Item Purchased,Name
Store 1,22.5,Dog Food,Chris
Store 1,2.5,Kitty Litter,Kevyn
Store 2,5.0,Bird Seed,Vinod


In [86]:
# Filtering, .loc allows binary masking
df.loc[df['Cost'] > 5,:]

Unnamed: 0,Cost,Item Purchased,Name
Store 1,22.5,Dog Food,Chris


In [89]:
# Mixed selection
x = df.loc[['Store 1'],['Cost', 'Name']]  # uses indexing, so copy is created 
x

Unnamed: 0,Cost,Name
Store 1,22.5,Chris
Store 1,2.5,Kevyn


In [90]:
# Col., row, mixed selection all return copies
x[x['Cost'] >5] = 0
x

Unnamed: 0,Cost,Name
Store 1,0.0,0
Store 1,2.5,Kevyn


In [91]:
df

Unnamed: 0,Cost,Item Purchased,Name
Store 1,22.5,Dog Food,Chris
Store 1,2.5,Kitty Litter,Kevyn
Store 2,5.0,Bird Seed,Vinod


In [92]:
# Dropping a column or row, being aware of view/copy
df.drop('Store 1') # return a copy

Unnamed: 0,Cost,Item Purchased,Name
Store 2,5.0,Bird Seed,Vinod


In [93]:
df

Unnamed: 0,Cost,Item Purchased,Name
Store 1,22.5,Dog Food,Chris
Store 1,2.5,Kitty Litter,Kevyn
Store 2,5.0,Bird Seed,Vinod


In [95]:
del df['Name']

In [96]:
df # in place drop

Unnamed: 0,Cost,Item Purchased
Store 1,22.5,Dog Food
Store 1,2.5,Kitty Litter
Store 2,5.0,Bird Seed


## Slicing - not using .loc

In [76]:
y = df['Cost']  # not using .loc , so slicing, column reference creates a viww
y

Store 1    22.5
Store 1     2.5
Store 2     5.0
Name: Cost, dtype: float64

In [77]:
y+=2
y

Store 1    24.5
Store 1     4.5
Store 2     7.0
Name: Cost, dtype: float64

In [78]:
df

Unnamed: 0,Cost,Item Purchased,Name
Store 1,24.5,Dog Food,Chris
Store 1,4.5,Kitty Litter,Kevyn
Store 2,7.0,Bird Seed,Vinod


## Indexing

In [103]:
temp = df.loc[:,['Cost',]]
temp

Unnamed: 0,Cost
Store 1,24.5
Store 1,4.5
Store 2,7.0


In [104]:
temp -= 2
temp

Unnamed: 0,Cost
Store 1,22.5
Store 1,2.5
Store 2,5.0


In [105]:
df

Unnamed: 0,Cost,Item Purchased
Store 1,24.5,Dog Food
Store 1,4.5,Kitty Litter
Store 2,7.0,Bird Seed


## Data Loading using read_csv

In [79]:
dir = '/Users/sumad/Documents/DS/Python/UM Spcialization/DS_with_Python/'

In [80]:
df = pd.read_csv(dir + 'olympics.csv')

In [81]:
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
0,,# Summer,01 !,02 !,03 !,Total,# Winter,01 !,02 !,03 !,Total,# Games,01 !,02 !,03 !,Combined total
1,Afghanistan (AFG),13,0,0,2,2,0,0,0,0,0,13,0,0,2,2
2,Algeria (ALG),12,5,2,8,15,3,0,0,0,0,15,5,2,8,15
3,Argentina (ARG),23,18,24,28,70,18,0,0,0,0,41,18,24,28,70
4,Armenia (ARM),5,1,2,9,12,6,0,0,0,0,11,1,2,9,12


In [167]:
# dropping rows, specifying 
df = pd.read_csv(dir + 'olympics.csv', index_col = 0,skiprows = 1) # account for skip rows and give index_col

In [168]:
df.head()

Unnamed: 0,# Summer,01 !,02 !,03 !,Total,# Winter,01 !.1,02 !.1,03 !.1,Total.1,# Games,01 !.2,02 !.2,03 !.2,Combined total
Afghanistan (AFG),13,0,0,2,2,0,0,0,0,0,13,0,0,2,2
Algeria (ALG),12,5,2,8,15,3,0,0,0,0,15,5,2,8,15
Argentina (ARG),23,18,24,28,70,18,0,0,0,0,41,18,24,28,70
Armenia (ARM),5,1,2,9,12,6,0,0,0,0,11,1,2,9,12
Australasia (ANZ) [ANZ],2,3,4,5,12,0,0,0,0,0,2,3,4,5,12


In [169]:
# Rename columns in place, specifying as dictionary
for col in df.columns:
    if(col[0:2]=='01'):
        df.rename(columns= {col : 'Gold' + col[4:]},inplace= True)
    if(col[0:2]=='02'):
        df.rename(columns= {col : 'Silver' + col[4:]}, inplace= True)
    if(col[0:2]=='03'):
        df.rename(columns= {col : 'Bronze' + col[4:]}, inplace= True)
df.head()        

Unnamed: 0,# Summer,Gold,Silver,Bronze,Total,# Winter,Gold.1,Silver.1,Bronze.1,Total.1,# Games,Gold.2,Silver.2,Bronze.2,Combined total
Afghanistan (AFG),13,0,0,2,2,0,0,0,0,0,13,0,0,2,2
Algeria (ALG),12,5,2,8,15,3,0,0,0,0,15,5,2,8,15
Argentina (ARG),23,18,24,28,70,18,0,0,0,0,41,18,24,28,70
Armenia (ARM),5,1,2,9,12,6,0,0,0,0,11,1,2,9,12
Australasia (ANZ) [ANZ],2,3,4,5,12,0,0,0,0,0,2,3,4,5,12


## Filtering using Boolean Masking 
- Boolean masking results in fast pandas operations, often made part of thr workflow for this reason

In [170]:
mask = df['Gold'] > 0 
mask.head()

Afghanistan (AFG)          False
Algeria (ALG)               True
Argentina (ARG)             True
Armenia (ARM)               True
Australasia (ANZ) [ANZ]     True
Name: Gold, dtype: bool

In [171]:
type(mask)

pandas.core.series.Series

#### Use  .loc when using boolean mask to filter and column selection

In [175]:
df1 = df.loc[mask, ['Gold', 'Silver']]
df1.head()
#print(len(df))
#len(df1)

Unnamed: 0,Gold,Silver
Algeria (ALG),5,2
Argentina (ARG),18,24
Armenia (ARM),1,2
Australasia (ANZ) [ANZ],3,4
Australia (AUS) [AUS] [Z],139,152


In [154]:
# Alternatively Using where method
df2 = df.where(df['Gold'] > 0)

In [155]:
len(df2)

147

In [160]:
df2.head() # where method results in NaNs in thr rows where condition is not met, required dropping methods
df3 = df2.dropna(axis= 0)
len(df3)

100

### Boolean masks can be chained using bitwise operators to form more complex filtering mechanism that give fast result
### BUT each condition must be encapsulated in parentheses

In [92]:
df4 = df.where((df['Gold'] > 0) & (df['Silver'] > 1)).dropna(axis =0)

In [93]:
len(df4)

87

## Changing Index

In [94]:
df.head()

Unnamed: 0,# Summer,Gold,Silver,Bronze,Total,# Winter,Gold.1,Silver.1,Bronze.1,Total.1,# Games,Gold.2,Silver.2,Bronze.2,Combined total
Afghanistan (AFG),13,0,0,2,2,0,0,0,0,0,13,0,0,2,2
Algeria (ALG),12,5,2,8,15,3,0,0,0,0,15,5,2,8,15
Argentina (ARG),23,18,24,28,70,18,0,0,0,0,41,18,24,28,70
Armenia (ARM),5,1,2,9,12,6,0,0,0,0,11,1,2,9,12
Australasia (ANZ) [ANZ],2,3,4,5,12,0,0,0,0,0,2,3,4,5,12


In [97]:
# Index by Combined Total, make country a columns
df['country'] = df.index
df.set_index('Combined total',inplace= True) # Takes existing column name/s and moves the col/s to index, with colname as
                                            # index name
df.head()

Unnamed: 0_level_0,# Summer,Gold,Silver,Bronze,Total,# Winter,Gold.1,Silver.1,Bronze.1,Total.1,# Games,Gold.2,Silver.2,Bronze.2,country
Combined total,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2,13,0,0,2,2,0,0,0,0,0,13,0,0,2,Afghanistan (AFG)
15,12,5,2,8,15,3,0,0,0,0,15,5,2,8,Algeria (ALG)
70,23,18,24,28,70,18,0,0,0,0,41,18,24,28,Argentina (ARG)
12,5,1,2,9,12,6,0,0,0,0,11,1,2,9,Armenia (ARM)
12,2,3,4,5,12,0,0,0,0,0,2,3,4,5,Australasia (ANZ) [ANZ]


In [99]:
# reset index clears the index and sets a default integer index
df.reset_index(inplace= True)
df.head()

Unnamed: 0,Combined total,# Summer,Gold,Silver,Bronze,Total,# Winter,Gold.1,Silver.1,Bronze.1,Total.1,# Games,Gold.2,Silver.2,Bronze.2,country
0,2,13,0,0,2,2,0,0,0,0,0,13,0,0,2,Afghanistan (AFG)
1,15,12,5,2,8,15,3,0,0,0,0,15,5,2,8,Algeria (ALG)
2,70,23,18,24,28,70,18,0,0,0,0,41,18,24,28,Argentina (ARG)
3,12,5,1,2,9,12,6,0,0,0,0,11,1,2,9,Armenia (ARM)
4,12,2,3,4,5,12,0,0,0,0,0,2,3,4,5,Australasia (ANZ) [ANZ]


## Building and Querying a hierarchical index

### Allows for multi level indexing 
### Taking existing columns and building multilevel indices from their unique value

In [121]:
dir = '/Users/sumad/Documents/DS/Python/UM Spcialization/DS_with_Python/'
with open(dir + 'mpg.csv') as con:
    dt = pd.read_csv(con)

In [131]:
dt.drop('Unnamed: 0',inplace= True, axis= 1)
#dt.columns

In [132]:
dt.head()

Unnamed: 0,manufacturer,model,displ,year,cyl,trans,drv,cty,hwy,fl,class
0,audi,a4,1.8,1999,4,auto(l5),f,18,29,p,compact
1,audi,a4,1.8,1999,4,manual(m5),f,21,29,p,compact
2,audi,a4,2.0,2008,4,manual(m6),f,20,31,p,compact
3,audi,a4,2.0,2008,4,auto(av),f,21,30,p,compact
4,audi,a4,2.8,1999,6,auto(l5),f,16,26,p,compact


In [133]:
dt1 = dt.set_index(['year','cyl'])

In [135]:
dt1.head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,manufacturer,model,displ,trans,drv,cty,hwy,fl,class
year,cyl,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1999,4,audi,a4,1.8,auto(l5),f,18,29,p,compact
1999,4,audi,a4,1.8,manual(m5),f,21,29,p,compact
2008,4,audi,a4,2.0,manual(m6),f,20,31,p,compact
2008,4,audi,a4,2.0,auto(av),f,21,30,p,compact
1999,6,audi,a4,2.8,auto(l5),f,16,26,p,compact
1999,6,audi,a4,2.8,manual(m5),f,18,26,p,compact
2008,6,audi,a4,3.1,auto(av),f,18,27,p,compact
1999,4,audi,a4 quattro,1.8,manual(m5),4,18,26,p,compact
1999,4,audi,a4 quattro,1.8,auto(l5),4,16,25,p,compact
2008,4,audi,a4 quattro,2.0,manual(m6),4,20,28,p,compact


In [139]:
# Querying requires passing tuples of values of double indices to loc
dt1.loc[[(1999, 4), (2008,4)]]

Unnamed: 0_level_0,Unnamed: 1_level_0,manufacturer,model,displ,trans,drv,cty,hwy,fl,class
year,cyl,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1999,4,audi,a4,1.8,auto(l5),f,18,29,p,compact
1999,4,audi,a4,1.8,manual(m5),f,21,29,p,compact
1999,4,audi,a4 quattro,1.8,manual(m5),4,18,26,p,compact
1999,4,audi,a4 quattro,1.8,auto(l5),4,16,25,p,compact
1999,4,chevrolet,malibu,2.4,auto(l4),f,19,27,r,midsize
1999,4,dodge,caravan 2wd,2.4,auto(l3),f,18,24,r,minivan
1999,4,honda,civic,1.6,manual(m5),f,28,33,r,subcompact
1999,4,honda,civic,1.6,auto(l4),f,24,32,r,subcompact
1999,4,honda,civic,1.6,manual(m5),f,25,32,r,subcompact
1999,4,honda,civic,1.6,manual(m5),f,23,29,p,subcompact


 **Reindex the purchase records DataFrame to be indexed hierarchically, first by store, then by person. Name these indexes 'Location' and 'Name'. Then add a new entry to it with the value of:
Name: 'Kevyn', Item Purchased: 'Kitty Food', Cost: 3.00 Location: 'Store 2'.**

In [154]:
purchase_1 = pd.Series({'Name': 'Chris',
                        'Item Purchased': 'Dog Food',
                        'Cost': 22.50})
purchase_2 = pd.Series({'Name': 'Kevyn',
                        'Item Purchased': 'Kitty Litter',
                        'Cost': 2.50})
purchase_3 = pd.Series({'Name': 'Vinod',
                        'Item Purchased': 'Bird Seed',
                        'Cost': 5.00})

df = pd.DataFrame([purchase_1, purchase_2, purchase_3], index=['Store 1', 'Store 1', 'Store 2'])


df['Location'] = df.index 
df.set_index(['Location', 'Name'],inplace= True)


In [155]:
new = pd.DataFrame({('Store 3', 'Kevin') : {'Item Purchased' :'Kitty Food', 
                                         'Cost' : 3.00}
                })
df.append(new.T)

Unnamed: 0_level_0,Unnamed: 1_level_0,Cost,Item Purchased
Location,Name,Unnamed: 2_level_1,Unnamed: 3_level_1
Store 1,Chris,22.5,Dog Food
Store 1,Kevyn,2.5,Kitty Litter
Store 2,Vinod,5.0,Bird Seed
Store 3,Kevin,3.0,Kitty Food


## Missing Values
- handy function pd.fillna(value, method)
- ffill and bfill : This requires usually that the data is sorted 
  - Use set_index(), sort_index() methods from a Pandas DataFrame
- or with a value

In [160]:
pd.isnull(None)

True

In [161]:
pd.isnull(np.nan)

True

In [None]:
# np functions ignore NaN value

In [165]:
import numpy as np
x = pd.Series([1,2,3], index = ['one', 'two', 'three'])
y = pd.Series([4,None,6], index = x.index)
z = pd.Series([3,4,None], index = x.index)
pd.DataFrame([x,y,z])

Unnamed: 0,one,two,three
0,1.0,2.0,3.0
1,4.0,,6.0
2,3.0,4.0,


In [166]:
np.min(pd.DataFrame([x,y,z]),axis = 1)

0    1.0
1    4.0
2    3.0
dtype: float64