In [79]:
import pandas as pd

In [80]:
chicago = pd.read_csv("chicago.csv").dropna(how="all")
chicago.head(3)

Unnamed: 0,Name,Position Title,Department,Employee Annual Salary
0,"AARON, ELVIA J",WATER RATE TAKER,WATER MGMNT,$90744.00
1,"AARON, JEFFERY M",POLICE OFFICER,POLICE,$84450.00
2,"AARON, KARINA",POLICE OFFICER,POLICE,$84450.00


In [81]:
chicago.nunique()

Name                      31776
Position Title             1093
Department                   35
Employee Annual Salary     1156
dtype: int64

In [82]:
chicago["Department"].astype("category")

0             WATER MGMNT
1                  POLICE
2                  POLICE
3        GENERAL SERVICES
4             WATER MGMNT
               ...       
32057    GENERAL SERVICES
32058              POLICE
32059              POLICE
32060              POLICE
32061                DoIT
Name: Department, Length: 32062, dtype: category
Categories (35, object): ['ADMIN HEARNG', 'ANIMAL CONTRL', 'AVIATION', 'BOARD OF ELECTION', ..., 'STREETS & SAN', 'TRANSPORTN', 'TREASURER', 'WATER MGMNT']

## Common string methods

In [83]:
chicago["Name"].str.lower().str.upper().str.title()

0            Aaron,  Elvia J
1          Aaron,  Jeffery M
2             Aaron,  Karina
3        Aaron,  Kimberlei R
4        Abad Jr,  Vicente M
                ...         
32057    Zygadlo,  Michael J
32058     Zygowicz,  Peter J
32059      Zymantas,  Mark E
32060    Zyrkowski,  Carlo E
32061    Zyskowski,  Dariusz
Name: Name, Length: 32062, dtype: object

In [84]:
chicago["Name"].str.len()

0        15
1        17
2        14
3        19
4        19
         ..
32057    19
32058    18
32059    17
32060    19
32061    19
Name: Name, Length: 32062, dtype: int64

## Replace

In [85]:
"Hello world".replace("l", "!")

'He!!o wor!d'

In [86]:
chicago["Department"].str.replace("MGMNT", "MANAGEMENT")

0        WATER MANAGEMENT
1                  POLICE
2                  POLICE
3        GENERAL SERVICES
4        WATER MANAGEMENT
               ...       
32057    GENERAL SERVICES
32058              POLICE
32059              POLICE
32060              POLICE
32061                DoIT
Name: Department, Length: 32062, dtype: object

In [87]:
chicago["Employee Annual Salary"] = chicago["Employee Annual Salary"].str.replace("$", "").astype(float)

  chicago["Employee Annual Salary"] = chicago["Employee Annual Salary"].str.replace("$", "").astype(float)


In [88]:
chicago["Employee Annual Salary"].sum()

2571506375.36

In [89]:
chicago["Employee Annual Salary"].mean()

80204.178633899

## Filtering with string methods

In [90]:
mask = chicago["Position Title"].str.lower().str.startswith("water")
chicago[mask].head(3)

Unnamed: 0,Name,Position Title,Department,Employee Annual Salary
0,"AARON, ELVIA J",WATER RATE TAKER,WATER MGMNT,90744.0
671,"ANDER, PERRY A",WATER CHEMIST II,WATER MGMNT,82044.0
1054,"ASHLEY, KARMA T",WATER CHEMIST II,WATER MGMNT,82044.0


In [91]:
mask = chicago["Position Title"].str.lower().str.contains("water")
chicago[mask].head(3)

Unnamed: 0,Name,Position Title,Department,Employee Annual Salary
0,"AARON, ELVIA J",WATER RATE TAKER,WATER MGMNT,90744.0
554,"ALUISE, VINCENT G",FOREMAN OF WATER PIPE CONSTRUCTION,WATER MGMNT,102440.0
671,"ANDER, PERRY A",WATER CHEMIST II,WATER MGMNT,82044.0


### More string methods - `strip`, `lstrip` and `rstrip`

In [92]:
"   Hello World   ".strip()

'Hello World'

In [93]:
"   Hello World   ".rstrip()

'   Hello World'

In [94]:
"   Hello World   ".lstrip()

'Hello World   '

In [95]:
chicago["Position Title"] = chicago["Position Title"].str.lstrip()

## Invoke string methods on DataFrame index and columns

In [96]:
# chicago.index = chicago.index.str.strip().str.title()

In [97]:
chicago.head(3)

Unnamed: 0,Name,Position Title,Department,Employee Annual Salary
0,"AARON, ELVIA J",WATER RATE TAKER,WATER MGMNT,90744.0
1,"AARON, JEFFERY M",POLICE OFFICER,POLICE,84450.0
2,"AARON, KARINA",POLICE OFFICER,POLICE,84450.0


In [98]:
chicago.columns = chicago.columns.str.upper()

In [99]:
chicago.head(3)

Unnamed: 0,NAME,POSITION TITLE,DEPARTMENT,EMPLOYEE ANNUAL SALARY
0,"AARON, ELVIA J",WATER RATE TAKER,WATER MGMNT,90744.0
1,"AARON, JEFFERY M",POLICE OFFICER,POLICE,84450.0
2,"AARON, KARINA",POLICE OFFICER,POLICE,84450.0


## `split` method

In [100]:
"Hello World".split()

['Hello', 'World']

In [101]:
chicago["NAME"].str.split(",").str.join(" ").str.title()

0            Aaron   Elvia J
1          Aaron   Jeffery M
2             Aaron   Karina
3        Aaron   Kimberlei R
4        Abad Jr   Vicente M
                ...         
32057    Zygadlo   Michael J
32058     Zygowicz   Peter J
32059      Zymantas   Mark E
32060    Zyrkowski   Carlo E
32061    Zyskowski   Dariusz
Name: NAME, Length: 32062, dtype: object

## `split` method

In [106]:
chicago[["First Name", "Last Name"]] = chicago["NAME"].str.split(",", expand=True, n=1)

In [108]:
chicago.head(3)

Unnamed: 0,NAME,POSITION TITLE,DEPARTMENT,EMPLOYEE ANNUAL SALARY,First Name,Last Name
0,"AARON, ELVIA J",WATER RATE TAKER,WATER MGMNT,90744.0,AARON,ELVIA J
1,"AARON, JEFFERY M",POLICE OFFICER,POLICE,84450.0,AARON,JEFFERY M
2,"AARON, KARINA",POLICE OFFICER,POLICE,84450.0,AARON,KARINA
