# Function Application and Mapping

In [31]:
import numpy as np
import pandas as pd

Frame = pd.DataFrame(np.random.randn(4,3), columns=list("abc"), index= ["Ohio", "Texas", "Oregon", "Utah"])
print(Frame)
print(np.abs(Frame))
print(Frame["b"].min())
print(Frame["b"].max())
print(Frame["b"].max() - Frame["b"].min())
f = lambda x: x.max() - x.min()
df = Frame.apply(f)
print(df, type(df))
df = Frame.apply(f, axis=1)
print(df)

def min_max(x):
    minimum = x.min()
    maximum = x.max()
    return pd.Series([maximum , minimum] , index=["max", "min"])
df_one = Frame.apply(min_max)
print(df_one)


               a         b         c
Ohio    0.968776 -0.776546 -0.519390
Texas   1.369998  0.297425  1.614425
Oregon  0.659767  2.588467 -1.569547
Utah   -0.755822 -0.740914  0.332308
               a         b         c
Ohio    0.968776  0.776546  0.519390
Texas   1.369998  0.297425  1.614425
Oregon  0.659767  2.588467  1.569547
Utah    0.755822  0.740914  0.332308
-0.7765458046059873
2.5884669946967245
3.3650127993027117
a    2.125820
b    3.365013
c    3.183972
dtype: float64 <class 'pandas.core.series.Series'>
Ohio      1.745321
Texas     1.317001
Oregon    4.158014
Utah      1.088130
dtype: float64
            a         b         c
max  1.369998  2.588467  1.614425
min -0.755822 -0.776546 -1.569547


In [43]:
frame = pd.DataFrame(np.arange(8).reshape(2,4), columns=list("dcba"), index=["two", "one"])
print(frame)
print()
print(frame.sort_index())
print()
print(frame.sort_index(axis=1))
print()
print(frame.sort_index(axis=1, ascending=False))

     d  c  b  a
two  0  1  2  3
one  4  5  6  7

     d  c  b  a
one  4  5  6  7
two  0  1  2  3

     a  b  c  d
two  3  2  1  0
one  7  6  5  4

     d  c  b  a
two  0  1  2  3
one  4  5  6  7


In [63]:
Frame = pd.DataFrame(np.random.randn(4,3), columns=list("abc"), index= ["Ohio", "Texas", "Oregon", "Utah"])
print(Frame)
print(Frame.sort_values(by="c"))
print(Frame.rank(ascending= True, method="min"))
print(Frame.rank(ascending= False, method="max"))
print(Frame.rank(axis="columns"))

               a         b         c
Ohio   -0.544482 -0.368084 -1.058855
Texas  -0.232299  0.473889  0.178106
Oregon -2.469790  0.034869 -2.813350
Utah   -1.187372  0.284728  0.153092
               a         b         c
Oregon -2.469790  0.034869 -2.813350
Ohio   -0.544482 -0.368084 -1.058855
Utah   -1.187372  0.284728  0.153092
Texas  -0.232299  0.473889  0.178106
          a    b    c
Ohio    3.0  1.0  2.0
Texas   4.0  4.0  4.0
Oregon  1.0  2.0  1.0
Utah    2.0  3.0  3.0
          a    b    c
Ohio    2.0  4.0  3.0
Texas   1.0  1.0  1.0
Oregon  4.0  3.0  4.0
Utah    3.0  2.0  2.0
          a    b    c
Ohio    2.0  3.0  1.0
Texas   1.0  3.0  2.0
Oregon  2.0  3.0  1.0
Utah    1.0  3.0  2.0


In [90]:
df = pd.DataFrame([[1.4, 6.6], [1.2,1.6], [np.nan, 1.6], [-1.5, 1.3]], columns=["one", "two"],index= ["a","b","c","d"])
print(df)
print(df.mean(skipna=False))
print(df.mean(axis=1, skipna=False))

   one  two
a  1.4  6.6
b  1.2  1.6
c  NaN  1.6
d -1.5  1.3
one      NaN
two    2.775
dtype: float64
a    4.0
b    1.4
c    NaN
d   -0.1
dtype: float64


In [91]:
print(df.mean(axis=0 , skipna=False))
print(df.mean(axis=0 , skipna=True))

one      NaN
two    2.775
dtype: float64
one    0.366667
two    2.775000
dtype: float64


In [95]:
print(df)
print(df["one"].unique())
print(df["two"].unique())
print(df["two"].value_counts())

   one  two
a  1.4  6.6
b  1.2  1.6
c  NaN  1.6
d -1.5  1.3
[ 1.4  1.2  nan -1.5]
[6.6 1.6 1.3]
1.6    2
1.3    1
6.6    1
Name: two, dtype: int64


# loc and iloc

In [96]:
import numpy as np
import pandas as pd

df = pd.DataFrame(np.arange(16).reshape(4,4), index = ["Colorado", "Ohio", "Utah", "New York"], columns=["one", "two", "three", "four"])
print(df)

print(df.loc["Ohio", ["three", "four"]]) 
print(df.loc[["New York","Ohio"], ["one","two"]])

print(df.iloc[2:4,2:4])
print(df.iloc[:])
print(df.iloc[:,:2])
print(df.iloc[:2])
print(df.iloc[2, [3,0,1]])


          one  two  three  four
Colorado    0    1      2     3
Ohio        4    5      6     7
Utah        8    9     10    11
New York   12   13     14    15
three    6
four     7
Name: Ohio, dtype: int32
          one  two
New York   12   13
Ohio        4    5
          three  four
Utah         10    11
New York     14    15
          one  two  three  four
Colorado    0    1      2     3
Ohio        4    5      6     7
Utah        8    9     10    11
New York   12   13     14    15
          one  two
Colorado    0    1
Ohio        4    5
Utah        8    9
New York   12   13
          one  two  three  four
Colorado    0    1      2     3
Ohio        4    5      6     7
four    11
one      8
two      9
Name: Utah, dtype: int32


# Arithmetic and Data Alignment

#### Data Alignment means if indexes match then arithmetic opereration can be performed if not match may be some indexes can be added and set to NAN

In [47]:
df1 = pd.DataFrame(np.arange(9).reshape(3,3), index=["Ohio", "Texas", "Colorado"], columns=list("abc"))
df2 = pd.DataFrame(np.arange(12).reshape(4,3), index=["Utah", "Ohio", "Texas", "Oregon"], columns=list("abc"))
print(df1)
print()
print(df2)
print()
df3 = df1 + df2
print(df3)

          a  b  c
Ohio      0  1  2
Texas     3  4  5
Colorado  6  7  8

        a   b   c
Utah    0   1   2
Ohio    3   4   5
Texas   6   7   8
Oregon  9  10  11

            a     b     c
Colorado  NaN   NaN   NaN
Ohio      3.0   5.0   7.0
Oregon    NaN   NaN   NaN
Texas     9.0  11.0  13.0
Utah      NaN   NaN   NaN


In [48]:
df3 = df1.add(df2, fill_value= 1)
print(df3)

             a     b     c
Colorado   7.0   8.0   9.0
Ohio       3.0   5.0   7.0
Oregon    10.0  11.0  12.0
Texas      9.0  11.0  13.0
Utah       1.0   2.0   3.0


In [53]:
df = pd.DataFrame(np.arange(12).reshape(4,3), index=["Utah", "Ohio", "Texas", "Oregon"], columns=list("abc"))
print(df.iloc[0])
print(df)

series = pd.Series([1,2,3], index=list("abc"))
print(series)
print(df-series)

a    0
b    1
c    2
Name: Utah, dtype: int32
        a   b   c
Utah    0   1   2
Ohio    3   4   5
Texas   6   7   8
Oregon  9  10  11
a    1
b    2
c    3
dtype: int64
        a  b  c
Utah   -1 -1 -1
Ohio    2  2  2
Texas   5  5  5
Oregon  8  8  8


# Deleting Data (row or column from Data Frame)

In [5]:
import numpy as np
import pandas as pd

df = pd.DataFrame(np.arange(12).reshape(4,3), index= ["Ohio", "New York", "Utah", "Colorado"],
                                              columns= ["one", "two","three"])
df.head()
df = df.drop("two", axis=1)
print(df)
df = df.drop("Colorado")
print(df)


          one  three
Ohio        0      2
New York    3      5
Utah        6      8
Colorado    9     11
          one  three
Ohio        0      2
New York    3      5
Utah        6      8


# Indexing, Selection and Filtering

In [93]:
df1 = pd.DataFrame(np.arange(16).reshape(4,4), index= ["Ohio", "New York", "Utah", "Colorado"],
                                              columns= ["one", "two","three", "four"])
print(df1)
df2 = df1[["one" , "two"]]
print(df2)
df3 = df1[2:]
print(df3)
print(df1.two[2:])
print(df1["two"] >3)
print(df1[df1["three" ]> 5])
print(df1.three[df1["three"] >5])

          one  two  three  four
Ohio        0    1      2     3
New York    4    5      6     7
Utah        8    9     10    11
Colorado   12   13     14    15
          one  two
Ohio        0    1
New York    4    5
Utah        8    9
Colorado   12   13
          one  two  three  four
Utah        8    9     10    11
Colorado   12   13     14    15
Utah         9
Colorado    13
Name: two, dtype: int32
Ohio        False
New York     True
Utah         True
Colorado     True
Name: two, dtype: bool
          one  two  three  four
New York    4    5      6     7
Utah        8    9     10    11
Colorado   12   13     14    15
New York     6
Utah        10
Colorado    14
Name: three, dtype: int32


# Reindex

In [45]:
obj1 = pd.Series(["Purple", "Red", "White"], index=[0, 5, 10] )
print(obj1)
obj2 = obj1.reindex(range(9))
print(obj2)
obj3 = obj1.reindex(range(9), method="ffill")
print(obj3)
obj4 = obj1.reindex(range(5,15), method="ffill")
print(obj4)


0     Purple
5        Red
10     White
dtype: object
0    Purple
1       NaN
2       NaN
3       NaN
4       NaN
5       Red
6       NaN
7       NaN
8       NaN
dtype: object
0    Purple
1    Purple
2    Purple
3    Purple
4    Purple
5       Red
6       Red
7       Red
8       Red
dtype: object
5       Red
6       Red
7       Red
8       Red
9       Red
10    White
11    White
12    White
13    White
14    White
dtype: object


In [78]:
df = pd.DataFrame(np.arange(9).reshape(3,3), index=["a", "b", "c"], columns=["Ohio", "New York", "Utah"])
print(df)
states = ["Utah", "Ohio", "Colorado", "New York"]
df = df.reindex(columns= states)
print(df)


   Ohio  New York  Utah
a     0         1     2
b     3         4     5
c     6         7     8
   Utah  Ohio  Colorado  New York
a     2     0       NaN         1
b     5     3       NaN         4
c     8     6       NaN         7


In [68]:
df = pd.DataFrame(np.arange(9).reshape(3,3), index=["a", "b", "c"], columns=["Ohio", "New York", "Utah"])
print(df)
df = df.reindex(["a","b","c","d"], method= "ffill")
print(df)

   Ohio  New York  Utah
a     0         1     2
b     3         4     5
c     6         7     8
   Ohio  New York  Utah
a     0         1     2
b     3         4     5
c     6         7     8
d     6         7     8
