In [1]:
import pandas as pd

In [27]:
chicago = pd.read_csv("data/chicago.csv").dropna(how="all")
chicago["Department"] = chicago["Department"].astype("category")
chicago.head(3)

Unnamed: 0,Name,Position Title,Department,Employee Annual Salary
0,"AARON, ELVIA J",WATER RATE TAKER,WATER MGMNT,$90744.00
1,"AARON, JEFFERY M",POLICE OFFICER,POLICE,$84450.00
2,"AARON, KARINA",POLICE OFFICER,POLICE,$84450.00


In [28]:
chicago.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 32062 entries, 0 to 32061
Data columns (total 4 columns):
Name                      32062 non-null object
Position Title            32062 non-null object
Department                32062 non-null category
Employee Annual Salary    32062 non-null object
dtypes: category(1), object(3)
memory usage: 1.0+ MB


In [29]:
chicago["Department"].nunique()

35

### Common string methods

In [11]:
"HELLO World".lower()

'hello world'

In [12]:
"HELLO World".upper()

'HELLO WORLD'

In [13]:
"HELLO World".title()

'Hello World'

In [19]:
len("HELLO World")

11

In [24]:
"Hello World".replace("l", "!")

'He!!o Wor!d'

In [45]:
"       Hello World   ".lstrip()

'Hello World   '

In [46]:
"   Hello World    ".rstrip()

'   Hello World'

In [47]:
"   Hello World    ".strip()

'Hello World'

In [54]:
"Hello my name is Rajan".split(" ")

['Hello', 'my', 'name', 'is', 'Rajan']

In [14]:
chicago.head(3)

Unnamed: 0,Name,Position Title,Department,Employee Annual Salary
0,"AARON, ELVIA J",WATER RATE TAKER,WATER MGMNT,$90744.00
1,"AARON, JEFFERY M",POLICE OFFICER,POLICE,$84450.00
2,"AARON, KARINA",POLICE OFFICER,POLICE,$84450.00


In [18]:
chicago["Name"].str.title().head(3)

0      Aaron,  Elvia J
1    Aaron,  Jeffery M
2       Aaron,  Karina
Name: Name, dtype: object

In [21]:
chicago["Department"].str.len().head(3)

0    11.0
1     6.0
2     6.0
Name: Department, dtype: float64

In [31]:
chicago["Employee Annual Salary"].str.replace("$", "").astype("float").head(3)

0    90744.0
1    84450.0
2    84450.0
Name: Employee Annual Salary, dtype: float64

In [32]:
chicago["Employee Annual Salary"] = chicago["Employee Annual Salary"].str.replace("$", "").astype("float").head(3)

### Filtering with string methods

In [34]:
mask = chicago["Position Title"].str.lower().str.contains("water")
chicago[mask].head()

Unnamed: 0,Name,Position Title,Department,Employee Annual Salary
0,"AARON, ELVIA J",WATER RATE TAKER,WATER MGMNT,90744.0
554,"ALUISE, VINCENT G",FOREMAN OF WATER PIPE CONSTRUCTION,WATER MGMNT,
671,"ANDER, PERRY A",WATER CHEMIST II,WATER MGMNT,
685,"ANDERSON, ANDREW J",DISTRICT SUPERINTENDENT OF WATER DISTRIBUTION,WATER MGMNT,
702,"ANDERSON, DONALD",FOREMAN OF WATER PIPE CONSTRUCTION,WATER MGMNT,


In [35]:
mask = chicago["Position Title"].str.lower().str.startswith("water")
chicago[mask].head()

Unnamed: 0,Name,Position Title,Department,Employee Annual Salary
0,"AARON, ELVIA J",WATER RATE TAKER,WATER MGMNT,90744.0
671,"ANDER, PERRY A",WATER CHEMIST II,WATER MGMNT,
1054,"ASHLEY, KARMA T",WATER CHEMIST II,WATER MGMNT,
1079,"ATKINS, JOANNA M",WATER CHEMIST II,WATER MGMNT,
1181,"AZEEM, MOHAMMED A",WATER CHEMIST II,WATER MGMNT,


In [40]:
mask = chicago["Position Title"].str.lower().str.endswith("specialist")
chicago[mask].head()

Unnamed: 0,Name,Position Title,Department,Employee Annual Salary
308,"ALARCON, LUIS J",LOAN PROCESSING SPECIALIST,COMMUNITY DEVELOPMENT,
422,"ALLAIN, CAROLYN",SENIOR TELECOMMUNICATIONS SPECIALIST,DoIT,
705,"ANDERSON, EDWARD M",SR PROCUREMENT SPECIALIST,PROCUREMENT,
1163,"AYALA JR, JUAN",FIELD SANITATION SPECIALIST,STREETS & SAN,
1558,"BARRETT, BARBARA J",TECHNICAL TRAINING SPECIALIST,POLICE,


In [50]:
chicago["Name"] = chicago["Name"].str.strip()
chicago["Position Title"] = chicago["Position Title"].str.strip()

### String methods on index and columns

In [51]:
chicago = pd.read_csv("data/chicago.csv", index_col="Name").dropna(how="all")
chicago["Department"] = chicago["Department"].astype("category")
chicago.head(3)

Unnamed: 0_level_0,Position Title,Department,Employee Annual Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"AARON, ELVIA J",WATER RATE TAKER,WATER MGMNT,$90744.00
"AARON, JEFFERY M",POLICE OFFICER,POLICE,$84450.00
"AARON, KARINA",POLICE OFFICER,POLICE,$84450.00


In [53]:
chicago.index = chicago.index.str.strip().str.title()
chicago.columns = chicago.columns.str.upper()
chicago.head(3)

Unnamed: 0_level_0,POSITION TITLE,DEPARTMENT,EMPLOYEE ANNUAL SALARY
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"Aaron, Elvia J",WATER RATE TAKER,WATER MGMNT,$90744.00
"Aaron, Jeffery M",POLICE OFFICER,POLICE,$84450.00
"Aaron, Karina",POLICE OFFICER,POLICE,$84450.00


In [59]:
chicago["POSITION TITLE"].str.split(" ").str.get(0).value_counts().head(10)

POLICE             10856
FIREFIGHTER-EMT     1509
SERGEANT            1186
POOL                 918
FIREFIGHTER          810
CROSSING             775
MOTOR                721
SANITATION           715
PARAMEDIC            641
ASST                 606
Name: POSITION TITLE, dtype: int64

### expand parameter

In [60]:
chicago = pd.read_csv("data/chicago.csv").dropna(how="all")
chicago["Department"] = chicago["Department"].astype("category")
chicago.head(3)

Unnamed: 0,Name,Position Title,Department,Employee Annual Salary
0,"AARON, ELVIA J",WATER RATE TAKER,WATER MGMNT,$90744.00
1,"AARON, JEFFERY M",POLICE OFFICER,POLICE,$84450.00
2,"AARON, KARINA",POLICE OFFICER,POLICE,$84450.00


In [62]:
chicago[ ["First Name", "Last Name"] ] = chicago["Name"].str.split(",", expand = True)
chicago.head()

Unnamed: 0,Name,Position Title,Department,Employee Annual Salary,First Name,Last Name
0,"AARON, ELVIA J",WATER RATE TAKER,WATER MGMNT,$90744.00,AARON,ELVIA J
1,"AARON, JEFFERY M",POLICE OFFICER,POLICE,$84450.00,AARON,JEFFERY M
2,"AARON, KARINA",POLICE OFFICER,POLICE,$84450.00,AARON,KARINA
3,"AARON, KIMBERLEI R",CHIEF CONTRACT EXPEDITER,GENERAL SERVICES,$89880.00,AARON,KIMBERLEI R
4,"ABAD JR, VICENTE M",CIVIL ENGINEER IV,WATER MGMNT,$106836.00,ABAD JR,VICENTE M
