# Pandas - 2


## Selection & Drop

In [1]:
from pandas import Series, DataFrame
import pandas as pd
import numpy as np


In [2]:
# Example from - https://chrisalbon.com/python/pandas_map_values_to_values.htm/
raw_data = {'first_name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy'],
            'last_name': ['Miller', 'Jacobson', 'Ali', 'Milner', 'Cooze'],
            'age': [42, 52, 36, 24, 73],
            'city': ['San Francisco', 'Baltimore', 'Miami', 'Douglas', 'Boston']}
df = pd.DataFrame(raw_data, columns =['first_name', 'last_name', 'age', 'city'])
df

Unnamed: 0,first_name,last_name,age,city
0,Jason,Miller,42,San Francisco
1,Molly,Jacobson,52,Baltimore
2,Tina,Ali,36,Miami
3,Jake,Milner,24,Douglas
4,Amy,Cooze,73,Boston


In [3]:
df["first_name"].head(3)

0    Jason
1    Molly
2     Tina
Name: first_name, dtype: object

In [4]:
df[["first_name", "age", "city"]].head(2)

Unnamed: 0,first_name,age,city
0,Jason,42,San Francisco
1,Molly,52,Baltimore


In [5]:
age_series = df["age"]
age_series[:3]

0    42
1    52
2    36
Name: age, dtype: int64

In [6]:
age_series[age_series<55]

0    42
1    52
2    36
3    24
Name: age, dtype: int64

In [7]:
df.drop(2)

Unnamed: 0,first_name,last_name,age,city
0,Jason,Miller,42,San Francisco
1,Molly,Jacobson,52,Baltimore
3,Jake,Milner,24,Douglas
4,Amy,Cooze,73,Boston


In [8]:
df.drop([0,2,3])

Unnamed: 0,first_name,last_name,age,city
1,Molly,Jacobson,52,Baltimore
4,Amy,Cooze,73,Boston


In [9]:
df.drop("city",axis=1)

Unnamed: 0,first_name,last_name,age
0,Jason,Miller,42
1,Molly,Jacobson,52
2,Tina,Ali,36
3,Jake,Milner,24
4,Amy,Cooze,73


In [10]:
df.drop("age",axis=1)

Unnamed: 0,first_name,last_name,city
0,Jason,Miller,San Francisco
1,Molly,Jacobson,Baltimore
2,Tina,Ali,Miami
3,Jake,Milner,Douglas
4,Amy,Cooze,Boston


## DataFrame Operation

In [11]:
s1 = Series(range(1,6), index=list("abced"))
s1

a    1
b    2
c    3
e    4
d    5
dtype: int64

In [12]:
s2 = Series(range(5,11), index=list("bcedef"))
s2

b     5
c     6
e     7
d     8
e     9
f    10
dtype: int64

In [13]:
s1.add(s2)

a     NaN
b     7.0
c     9.0
d    13.0
e    11.0
e    13.0
f     NaN
dtype: float64

In [14]:
s1+s2

a     NaN
b     7.0
c     9.0
d    13.0
e    11.0
e    13.0
f     NaN
dtype: float64

In [16]:
df1 = DataFrame(np.arange(9).reshape(3,3),columns=list("abc"))
df1

Unnamed: 0,a,b,c
0,0,1,2
1,3,4,5
2,6,7,8


In [17]:
df2 = DataFrame(np.arange(16).reshape(4,4),columns=list("abcd"))
df2

Unnamed: 0,a,b,c,d
0,0,1,2,3
1,4,5,6,7
2,8,9,10,11
3,12,13,14,15


In [18]:
df1 + df2

Unnamed: 0,a,b,c,d
0,0.0,2.0,4.0,
1,7.0,9.0,11.0,
2,14.0,16.0,18.0,
3,,,,


In [19]:
df1.add(df2, fill_value=0)

Unnamed: 0,a,b,c,d
0,0.0,2.0,4.0,3.0
1,7.0,9.0,11.0,7.0
2,14.0,16.0,18.0,11.0
3,12.0,13.0,14.0,15.0


In [20]:
df3 = DataFrame(np.arange(16).reshape(4,4),columns=list("abcd"))
df3

Unnamed: 0,a,b,c,d
0,0,1,2,3
1,4,5,6,7
2,8,9,10,11
3,12,13,14,15


In [22]:
s2 = Series(np.arange(10,14))
s2

0    10
1    11
2    12
3    13
dtype: int32

In [23]:
df3 + s2

Unnamed: 0,a,b,c,d,0,1,2,3
0,,,,,,,,
1,,,,,,,,
2,,,,,,,,
3,,,,,,,,


In [24]:
df3.add(s2, axis=0)

Unnamed: 0,a,b,c,d
0,10,11,12,13
1,15,16,17,18
2,20,21,22,23
3,25,26,27,28


## lambda / map / apply

In [26]:
s1 = Series(np.arange(10))
s1.head(5)

0    0
1    1
2    2
3    3
4    4
dtype: int32

In [27]:
s1.map(lambda x:x**2).head(5)

0     0
1     1
2     4
3     9
4    16
dtype: int64

In [29]:
z = {1:"A", 2:"B", 3:"C"}
s1.map(z).head(5)

0    NaN
1      A
2      B
3      C
4    NaN
dtype: object

In [31]:
s2 = Series(np.arange(10,20))
s1.map(s2).head(5)

0    10
1    11
2    12
3    13
4    14
dtype: int32

In [43]:
df

Unnamed: 0,first_name,last_name,age,city,sex,sex_code
0,Jason,Miller,42,San Francisco,male,0
1,Molly,Jacobson,52,Baltimore,female,1
2,Tina,Ali,36,Miami,female,1
3,Jake,Milner,24,Douglas,male,0
4,Amy,Cooze,73,Boston,female,1


In [37]:
df["sex"] = ["male", "female", "female", "male", "female"]
df

Unnamed: 0,first_name,last_name,age,city,sex
0,Jason,Miller,42,San Francisco,male
1,Molly,Jacobson,52,Baltimore,female
2,Tina,Ali,36,Miami,female
3,Jake,Milner,24,Douglas,male
4,Amy,Cooze,73,Boston,female


In [42]:
df["sex_code"] = df.sex.map({"male":0, "female":1})
df

Unnamed: 0,first_name,last_name,age,city,sex,sex_code
0,Jason,Miller,42,San Francisco,male,0
1,Molly,Jacobson,52,Baltimore,female,1
2,Tina,Ali,36,Miami,female,1
3,Jake,Milner,24,Douglas,male,0
4,Amy,Cooze,73,Boston,female,1


In [44]:
del df["sex_code"]
df

Unnamed: 0,first_name,last_name,age,city,sex
0,Jason,Miller,42,San Francisco,male
1,Molly,Jacobson,52,Baltimore,female
2,Tina,Ali,36,Miami,female
3,Jake,Milner,24,Douglas,male
4,Amy,Cooze,73,Boston,female


In [50]:
df.sex.replace({"male":0, "female":1}, inplace=True)
df.head(5)

Unnamed: 0,first_name,last_name,age,city,sex
0,Jason,Miller,42,San Francisco,0
1,Molly,Jacobson,52,Baltimore,1
2,Tina,Ali,36,Miami,1
3,Jake,Milner,24,Douglas,0
4,Amy,Cooze,73,Boston,1


In [52]:
df["earn"] = [156574, 1532354, 651423, 73515, 0]
df

Unnamed: 0,first_name,last_name,age,city,sex,earn
0,Jason,Miller,42,San Francisco,0,156574
1,Molly,Jacobson,52,Baltimore,1,1532354
2,Tina,Ali,36,Miami,1,651423
3,Jake,Milner,24,Douglas,0,73515
4,Amy,Cooze,73,Boston,1,0


In [53]:
f = lambda x: x.max() - x.min()
df_info = df[["age", "earn"]]
df_info.apply(f)

age          49
earn    1532354
dtype: int64

In [56]:
f1 = lambda x : -x
df_info.apply(f1).head(5)

Unnamed: 0,age,earn
0,-42,-156574
1,-52,-1532354
2,-36,-651423
3,-24,-73515
4,-73,0


In [58]:
df.describe()

Unnamed: 0,age,sex,earn
count,5.0,5.0,5.0
mean,45.4,0.6,482773.2
std,18.460769,0.547723,639732.5
min,24.0,0.0,0.0
25%,36.0,0.0,73515.0
50%,42.0,1.0,156574.0
75%,52.0,1.0,651423.0
max,73.0,1.0,1532354.0


In [60]:
df["sex"].replace({0:"male", 1:"female"}, inplace=True)
df

Unnamed: 0,first_name,last_name,age,city,sex,earn
0,Jason,Miller,42,San Francisco,male,156574
1,Molly,Jacobson,52,Baltimore,female,1532354
2,Tina,Ali,36,Miami,female,651423
3,Jake,Milner,24,Douglas,male,73515
4,Amy,Cooze,73,Boston,female,0


In [61]:
df.sex.unique()

array(['male', 'female'], dtype=object)

In [62]:
df.sum(axis=0)

first_name                       JasonMollyTinaJakeAmy
last_name                 MillerJacobsonAliMilnerCooze
age                                                227
city          San FranciscoBaltimoreMiamiDouglasBoston
sex                         malefemalefemalemalefemale
earn                                           2413866
dtype: object

In [65]:
df_info.sum(axis=1)

0     156616
1    1532406
2     651459
3      73539
4         73
dtype: int64

In [66]:
df.sort_values(["age","earn"], ascending=True).head(3)

Unnamed: 0,first_name,last_name,age,city,sex,earn
3,Jake,Milner,24,Douglas,male,73515
2,Tina,Ali,36,Miami,female,651423
0,Jason,Miller,42,San Francisco,male,156574


In [68]:
df.age.corr(df.earn)

0.039894049281904605