# Ian Schlierf
## Week 13 - 11/17/2020
### Python - Pandas DataFrame Processing

In [1]:
import numpy as np

from pandas import Series, DataFrame
import pandas as pd

In [2]:
#Drop entries
df = DataFrame(np.arange(16).reshape((4,4)), 
              index = ['Ohio', 'Colorado','UT','NY'],
              columns = ['one','two','three','four'])

df

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7
UT,8,9,10,11
NY,12,13,14,15


In [3]:
df.values

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [4]:
df.drop(['Colorado'])

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
UT,8,9,10,11
NY,12,13,14,15


In [6]:
df.drop(['Colorado','UT'])

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
NY,12,13,14,15


In [9]:
df.drop('two',axis=1)

Unnamed: 0,one,three,four
Ohio,0,2,3
Colorado,4,6,7
UT,8,10,11
NY,12,14,15


In [10]:
df.drop(['two','four'],axis=1)

Unnamed: 0,one,three
Ohio,0,2
Colorado,4,6
UT,8,10
NY,12,14


In [11]:
df

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7
UT,8,9,10,11
NY,12,13,14,15


In [12]:
df.drop('two',inplace=True, axis='columns')
df

Unnamed: 0,one,three,four
Ohio,0,2,3
Colorado,4,6,7
UT,8,10,11
NY,12,14,15


In [13]:
#select entries

df = DataFrame(np.arange(16).reshape((4,4)), 
              index = ['Ohio', 'Colorado','UT','NY'],
              columns = ['one','two','three','four'])

df

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7
UT,8,9,10,11
NY,12,13,14,15


In [14]:
df['one']

Ohio         0
Colorado     4
UT           8
NY          12
Name: one, dtype: int32

In [15]:
df[['one','four']]

Unnamed: 0,one,four
Ohio,0,3
Colorado,4,7
UT,8,11
NY,12,15


In [16]:
df[:2]

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7


In [17]:
df[df['three']>5]

Unnamed: 0,one,two,three,four
Colorado,4,5,6,7
UT,8,9,10,11
NY,12,13,14,15


In [18]:
df > 5

Unnamed: 0,one,two,three,four
Ohio,False,False,False,False
Colorado,False,False,True,True
UT,True,True,True,True
NY,True,True,True,True


In [19]:
df[df <5] = 0
df

Unnamed: 0,one,two,three,four
Ohio,0,0,0,0
Colorado,0,5,6,7
UT,8,9,10,11
NY,12,13,14,15


In [20]:
#select entries with location and index location
df.loc['Colorado']

one      0
two      5
three    6
four     7
Name: Colorado, dtype: int32

In [21]:
df.loc['Colorado', ['two','four']]

two     5
four    7
Name: Colorado, dtype: int32

In [23]:
df.iloc[2]

one       8
two       9
three    10
four     11
Name: UT, dtype: int32

In [24]:
df.iloc[2,[3,0,1]]

four    11
one      8
two      9
Name: UT, dtype: int32

In [26]:
df.iloc[[1,2],[3,0,1]]

Unnamed: 0,four,one,two
Colorado,7,0,5
UT,11,8,9


In [28]:
#arithmetic and alignment of data

df1 = DataFrame(np.arange(9).reshape((3,3)), columns = list('bcd'),
               index = ['OH','TX','CO'])

df2 = DataFrame(np.arange(12).reshape((4,3)), columns = list('bde'),
               index = ['UT','OH','TX','OR'])
df1



Unnamed: 0,b,c,d
OH,0,1,2
TX,3,4,5
CO,6,7,8


In [29]:
df2

Unnamed: 0,b,d,e
UT,0,1,2
OH,3,4,5
TX,6,7,8
OR,9,10,11


In [30]:
df1+df2

Unnamed: 0,b,c,d,e
CO,,,,
OH,3.0,,6.0,
OR,,,,
TX,9.0,,12.0,
UT,,,,


In [31]:
df1.add(df2,fill_value=0)

Unnamed: 0,b,c,d,e
CO,6.0,7.0,8.0,
OH,3.0,1.0,6.0,5.0
OR,9.0,,10.0,11.0
TX,9.0,4.0,12.0,8.0
UT,0.0,,1.0,2.0


In [32]:
df1

Unnamed: 0,b,c,d
OH,0,1,2
TX,3,4,5
CO,6,7,8


In [34]:
df1['e'] = df1['a'] + df1['d']

KeyError: 'a'

In [35]:
s=df.iloc[0]

In [36]:
s

one      0
two      0
three    0
four     0
Name: Ohio, dtype: int32

In [37]:
df2 + s

Unnamed: 0,b,d,e,four,one,three,two
UT,,,,,,,
OH,,,,,,,
TX,,,,,,,
OR,,,,,,,


In [38]:
df2 - s

Unnamed: 0,b,d,e,four,one,three,two
UT,,,,,,,
OH,,,,,,,
TX,,,,,,,
OR,,,,,,,


In [39]:
#sort and rank
df3 = DataFrame(np.arange(8).reshape(2,4), columns = list('dabc'),
               index = ['three','one'])

In [40]:
df3

Unnamed: 0,d,a,b,c
three,0,1,2,3
one,4,5,6,7


In [41]:
df3.sort_index()

Unnamed: 0,d,a,b,c
one,4,5,6,7
three,0,1,2,3


In [42]:
df3.sort_index(axis=1)

Unnamed: 0,a,b,c,d
three,1,2,3,0
one,5,6,7,4


In [43]:
df3.sort_index(axis =1, ascending = False)

Unnamed: 0,d,c,b,a
three,0,3,2,1
one,4,7,6,5


In [47]:
df4 = DataFrame({'b':[4,7,-3,2], 'a':[0,1,0,1]})

df4

Unnamed: 0,b,a
0,4,0
1,7,1
2,-3,0
3,2,1


In [48]:
df4.sort_values(by='b')

Unnamed: 0,b,a
2,-3,0
3,2,1
0,4,0
1,7,1


In [49]:
df4.sort_values(by=['a','b'])

Unnamed: 0,b,a
2,-3,0
0,4,0
3,2,1
1,7,1


In [50]:
df4

Unnamed: 0,b,a
0,4,0
1,7,1
2,-3,0
3,2,1


In [51]:
df5 = DataFrame({'b':[14,17,-13,21], 'a':[10,11,10,11]})
df5

Unnamed: 0,b,a
0,14,10
1,17,11
2,-13,10
3,21,11


In [52]:
df4.rank()

Unnamed: 0,b,a
0,3.0,1.5
1,4.0,3.5
2,1.0,1.5
3,2.0,3.5


In [53]:
df.rank()

Unnamed: 0,one,two,three,four
Ohio,1.5,1.0,1.0,1.0
Colorado,1.5,2.0,2.0,2.0
UT,3.0,3.0,3.0,3.0
NY,4.0,4.0,4.0,4.0


In [54]:
df5.rank()

Unnamed: 0,b,a
0,2.0,1.5
1,3.0,3.5
2,1.0,1.5
3,4.0,3.5
