# Chapter 7 - Working with Text Data

## Reducing Memory

In [87]:
import pandas as pd
chicago = pd.read_csv('chicago.csv')
chicago.head()

Unnamed: 0,Name,Position Title,Department,Employee Annual Salary
0,"AARON, ELVIA J",WATER RATE TAKER,WATER MGMNT,$90744.00
1,"AARON, JEFFERY M",POLICE OFFICER,POLICE,$84450.00
2,"AARON, KARINA",POLICE OFFICER,POLICE,$84450.00
3,"AARON, KIMBERLEI R",CHIEF CONTRACT EXPEDITER,GENERAL SERVICES,$89880.00
4,"ABAD JR, VICENTE M",CIVIL ENGINEER IV,WATER MGMNT,$106836.00


In [88]:
chicago.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 32063 entries, 0 to 32062
Data columns (total 4 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   Name                    32062 non-null  object
 1   Position Title          32062 non-null  object
 2   Department              32062 non-null  object
 3   Employee Annual Salary  32062 non-null  object
dtypes: object(4)
memory usage: 1002.1+ KB


## Change department from object to category will reduce memory
.nunique(), astype()

In [89]:
chicago.nunique()

Name                      31776
Position Title             1093
Department                   35
Employee Annual Salary     1156
dtype: int64

In [90]:
chicago.Department = chicago.Department.astype('category')
chicago.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 32063 entries, 0 to 32062
Data columns (total 4 columns):
 #   Column                  Non-Null Count  Dtype   
---  ------                  --------------  -----   
 0   Name                    32062 non-null  object  
 1   Position Title          32062 non-null  object  
 2   Department              32062 non-null  category
 3   Employee Annual Salary  32062 non-null  object  
dtypes: category(1), object(3)
memory usage: 784.2+ KB


## Common String Methods - .lower(), .upper(), .title(), and .len()

In [91]:
chicago.head()

Unnamed: 0,Name,Position Title,Department,Employee Annual Salary
0,"AARON, ELVIA J",WATER RATE TAKER,WATER MGMNT,$90744.00
1,"AARON, JEFFERY M",POLICE OFFICER,POLICE,$84450.00
2,"AARON, KARINA",POLICE OFFICER,POLICE,$84450.00
3,"AARON, KIMBERLEI R",CHIEF CONTRACT EXPEDITER,GENERAL SERVICES,$89880.00
4,"ABAD JR, VICENTE M",CIVIL ENGINEER IV,WATER MGMNT,$106836.00


In [92]:
print('hello world'.upper())
print('HELLO WORLD'.lower())
print('hello world'.title())
print(len('hello world')) #number of characters

HELLO WORLD
hello world
Hello World
11


In [93]:
chicago.Name.title() #it wont work. to call a string method need to combine .str

AttributeError: 'Series' object has no attribute 'title'

In [94]:
chicago.Name.str.title()

0            Aaron,  Elvia J
1          Aaron,  Jeffery M
2             Aaron,  Karina
3        Aaron,  Kimberlei R
4        Abad Jr,  Vicente M
                ...         
32058     Zygowicz,  Peter J
32059      Zymantas,  Mark E
32060    Zyrkowski,  Carlo E
32061    Zyskowski,  Dariusz
32062                    NaN
Name: Name, Length: 32063, dtype: object

In [95]:
chicago['Position Title'] = chicago['Position Title'].str.title()
chicago.head()

Unnamed: 0,Name,Position Title,Department,Employee Annual Salary
0,"AARON, ELVIA J",Water Rate Taker,WATER MGMNT,$90744.00
1,"AARON, JEFFERY M",Police Officer,POLICE,$84450.00
2,"AARON, KARINA",Police Officer,POLICE,$84450.00
3,"AARON, KIMBERLEI R",Chief Contract Expediter,GENERAL SERVICES,$89880.00
4,"ABAD JR, VICENTE M",Civil Engineer Iv,WATER MGMNT,$106836.00


In [96]:
len(chicago.Department) # gives only number of records

32063

In [97]:
chicago.Department.str.len()

0        11.0
1         6.0
2         6.0
3        16.0
4        11.0
         ... 
32058     6.0
32059     6.0
32060     6.0
32061     4.0
32062     NaN
Name: Department, Length: 32063, dtype: float64

## The .str.replace() Method
remember to assign to the column, it does not directly replace

In [98]:
'Hello World'.replace('l','!')

'He!!o Wor!d'

In [99]:
chicago.tail(3)

Unnamed: 0,Name,Position Title,Department,Employee Annual Salary
32060,"ZYRKOWSKI, CARLO E",Police Officer,POLICE,$87384.00
32061,"ZYSKOWSKI, DARIUSZ",Chief Data Base Analyst,DoIT,$113664.00
32062,,,,


In [100]:
chicago = chicago.dropna()
chicago.tail()

Unnamed: 0,Name,Position Title,Department,Employee Annual Salary
32057,"ZYGADLO, MICHAEL J",Frm Of Machinists - Automotive,GENERAL SERVICES,$99528.00
32058,"ZYGOWICZ, PETER J",Police Officer,POLICE,$87384.00
32059,"ZYMANTAS, MARK E",Police Officer,POLICE,$84450.00
32060,"ZYRKOWSKI, CARLO E",Police Officer,POLICE,$87384.00
32061,"ZYSKOWSKI, DARIUSZ",Chief Data Base Analyst,DoIT,$113664.00


In [101]:
chicago.Department

0             WATER MGMNT
1                  POLICE
2                  POLICE
3        GENERAL SERVICES
4             WATER MGMNT
               ...       
32057    GENERAL SERVICES
32058              POLICE
32059              POLICE
32060              POLICE
32061                DoIT
Name: Department, Length: 32062, dtype: category
Categories (35, object): ['ADMIN HEARNG', 'ANIMAL CONTRL', 'AVIATION', 'BOARD OF ELECTION', ..., 'STREETS & SAN', 'TRANSPORTN', 'TREASURER', 'WATER MGMNT']

In [102]:
chicago.Department.replace('MGMNT','MANAGEMENT')# not working, have to call str.

0             WATER MGMNT
1                  POLICE
2                  POLICE
3        GENERAL SERVICES
4             WATER MGMNT
               ...       
32057    GENERAL SERVICES
32058              POLICE
32059              POLICE
32060              POLICE
32061                DoIT
Name: Department, Length: 32062, dtype: category
Categories (35, object): ['ADMIN HEARNG', 'ANIMAL CONTRL', 'AVIATION', 'BOARD OF ELECTION', ..., 'STREETS & SAN', 'TRANSPORTN', 'TREASURER', 'WATER MGMNT']

In [103]:
chicago.Department = chicago.Department.str.replace('MGMNT','MANAGEMENT')# not overwrite, have to assign
chicago.Department

0        WATER MANAGEMENT
1                  POLICE
2                  POLICE
3        GENERAL SERVICES
4        WATER MANAGEMENT
               ...       
32057    GENERAL SERVICES
32058              POLICE
32059              POLICE
32060              POLICE
32061                DoIT
Name: Department, Length: 32062, dtype: object

In [104]:
chicago['Employee Annual Salary'].str.replace('$','') # type is still object

  chicago['Employee Annual Salary'].str.replace('$','') # type is still object


0         90744.00
1         84450.00
2         84450.00
3         89880.00
4        106836.00
           ...    
32057     99528.00
32058     87384.00
32059     84450.00
32060     87384.00
32061    113664.00
Name: Employee Annual Salary, Length: 32062, dtype: object

In [105]:
chicago['Employee Annual Salary'].str.replace('$','').astype('float')

  chicago['Employee Annual Salary'].str.replace('$','').astype('float')


0         90744.0
1         84450.0
2         84450.0
3         89880.0
4        106836.0
           ...   
32057     99528.0
32058     87384.0
32059     84450.0
32060     87384.0
32061    113664.0
Name: Employee Annual Salary, Length: 32062, dtype: float64

In [106]:
chicago['Employee Annual Salary']= chicago['Employee Annual Salary'].str.replace('$','').astype('float')
chicago.head()

  chicago['Employee Annual Salary']= chicago['Employee Annual Salary'].str.replace('$','').astype('float')


Unnamed: 0,Name,Position Title,Department,Employee Annual Salary
0,"AARON, ELVIA J",Water Rate Taker,WATER MANAGEMENT,90744.0
1,"AARON, JEFFERY M",Police Officer,POLICE,84450.0
2,"AARON, KARINA",Police Officer,POLICE,84450.0
3,"AARON, KIMBERLEI R",Chief Contract Expediter,GENERAL SERVICES,89880.0
4,"ABAD JR, VICENTE M",Civil Engineer Iv,WATER MANAGEMENT,106836.0


In [107]:
chicago['Employee Annual Salary'].nlargest(10)

8184     300000.0
7954     216210.0
25532    202728.0
8924     197736.0
8042     197724.0
19208    195000.0
3706     187680.0
18556    187680.0
29466    187680.0
13754    185364.0
Name: Employee Annual Salary, dtype: float64

## Filtering with String Methods .contains() .startswith() .endswith()

In [108]:
chicago = chicago.dropna()
chicago['Position Title'].str.lower().str.contains('water')

0         True
1        False
2        False
3        False
4        False
         ...  
32057    False
32058    False
32059    False
32060    False
32061    False
Name: Position Title, Length: 32062, dtype: bool

In [109]:
mask = chicago['Position Title'].str.lower().str.contains('water')
chicago[mask].head()

Unnamed: 0,Name,Position Title,Department,Employee Annual Salary
0,"AARON, ELVIA J",Water Rate Taker,WATER MANAGEMENT,90744.0
554,"ALUISE, VINCENT G",Foreman Of Water Pipe Construction,WATER MANAGEMENT,102440.0
671,"ANDER, PERRY A",Water Chemist Ii,WATER MANAGEMENT,82044.0
685,"ANDERSON, ANDREW J",District Superintendent Of Water Distribution,WATER MANAGEMENT,109272.0
702,"ANDERSON, DONALD",Foreman Of Water Pipe Construction,WATER MANAGEMENT,102440.0


In [110]:
mask2 = chicago['Position Title'].str.lower().str.startswith('water')
chicago[mask2].head()

Unnamed: 0,Name,Position Title,Department,Employee Annual Salary
0,"AARON, ELVIA J",Water Rate Taker,WATER MANAGEMENT,90744.0
671,"ANDER, PERRY A",Water Chemist Ii,WATER MANAGEMENT,82044.0
1054,"ASHLEY, KARMA T",Water Chemist Ii,WATER MANAGEMENT,82044.0
1079,"ATKINS, JOANNA M",Water Chemist Ii,WATER MANAGEMENT,82044.0
1181,"AZEEM, MOHAMMED A",Water Chemist Ii,WATER MANAGEMENT,53172.0


In [111]:
mask3 = chicago['Position Title'].str.lower().str.endswith('ist')
chicago[mask3].head() #cannot use chicago.mask3

Unnamed: 0,Name,Position Title,Department,Employee Annual Salary
184,"AFROZ, NAYYAR",Psychiatrist,HEALTH,99840.0
308,"ALARCON, LUIS J",Loan Processing Specialist,COMMUNITY DEVELOPMENT,81948.0
422,"ALLAIN, CAROLYN",Senior Telecommunications Specialist,DoIT,89880.0
472,"ALLEN, ROBERT",Machinist,WATER MANAGEMENT,94328.0
705,"ANDERSON, EDWARD M",Sr Procurement Specialist,PROCUREMENT,91476.0


## More String Methods .strip() .lstrip() .rstrip()

In [112]:
chicago.tail()

Unnamed: 0,Name,Position Title,Department,Employee Annual Salary
32057,"ZYGADLO, MICHAEL J",Frm Of Machinists - Automotive,GENERAL SERVICES,99528.0
32058,"ZYGOWICZ, PETER J",Police Officer,POLICE,87384.0
32059,"ZYMANTAS, MARK E",Police Officer,POLICE,84450.0
32060,"ZYRKOWSKI, CARLO E",Police Officer,POLICE,87384.0
32061,"ZYSKOWSKI, DARIUSZ",Chief Data Base Analyst,DoIT,113664.0


In [114]:
print('      hello world'.lstrip()) #remove left space of a string
print('hello world      '.rstrip()) #remove right space of a string
print('   hello world'.strip()) #remove left and right space of a string

hello world
hello world
hello world


In [117]:
chicago.Name.str.lstrip().str.rstrip() #same as str.strip()

0            AARON,  ELVIA J
1          AARON,  JEFFERY M
2             AARON,  KARINA
3        AARON,  KIMBERLEI R
4        ABAD JR,  VICENTE M
                ...         
32057    ZYGADLO,  MICHAEL J
32058     ZYGOWICZ,  PETER J
32059      ZYMANTAS,  MARK E
32060    ZYRKOWSKI,  CARLO E
32061    ZYSKOWSKI,  DARIUSZ
Name: Name, Length: 32062, dtype: object

In [119]:
chicago.Name = chicago.Name.str.strip()
chicago.head()

Unnamed: 0,Name,Position Title,Department,Employee Annual Salary
0,"AARON, ELVIA J",Water Rate Taker,WATER MANAGEMENT,90744.0
1,"AARON, JEFFERY M",Police Officer,POLICE,84450.0
2,"AARON, KARINA",Police Officer,POLICE,84450.0
3,"AARON, KIMBERLEI R",Chief Contract Expediter,GENERAL SERVICES,89880.0
4,"ABAD JR, VICENTE M",Civil Engineer Iv,WATER MANAGEMENT,106836.0


## String Methods on Index and Columns

In [123]:
chicago = pd.read_csv('chicago.csv',index_col = 'Name').dropna(how ='all')
chicago.Department = chicago.Department.astype('category')
chicago.tail()

Unnamed: 0_level_0,Position Title,Department,Employee Annual Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"ZYGADLO, MICHAEL J",FRM OF MACHINISTS - AUTOMOTIVE,GENERAL SERVICES,$99528.00
"ZYGOWICZ, PETER J",POLICE OFFICER,POLICE,$87384.00
"ZYMANTAS, MARK E",POLICE OFFICER,POLICE,$84450.00
"ZYRKOWSKI, CARLO E",POLICE OFFICER,POLICE,$87384.00
"ZYSKOWSKI, DARIUSZ",CHIEF DATA BASE ANALYST,DoIT,$113664.00


In [127]:
chicago.index= chicago.index.str.strip().str.title()
chicago.index

Index(['Aaron,  Elvia J', 'Aaron,  Jeffery M', 'Aaron,  Karina',
       'Aaron,  Kimberlei R', 'Abad Jr,  Vicente M', 'Abarca,  Anabel',
       'Abarca,  Emmanuel', 'Abascal,  Reece E', 'Abbasi,  Christopher',
       'Abbatacola,  Robert J',
       ...
       'Zwit,  Jeffrey J', 'Zwolfer,  Matthew W', 'Zych,  Mateusz',
       'Zydek,  Bryan', 'Zygadlo,  John P', 'Zygadlo,  Michael J',
       'Zygowicz,  Peter J', 'Zymantas,  Mark E', 'Zyrkowski,  Carlo E',
       'Zyskowski,  Dariusz'],
      dtype='object', name='Name', length=32062)

In [128]:
chicago.head()

Unnamed: 0_level_0,Position Title,Department,Employee Annual Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"Aaron, Elvia J",WATER RATE TAKER,WATER MGMNT,$90744.00
"Aaron, Jeffery M",POLICE OFFICER,POLICE,$84450.00
"Aaron, Karina",POLICE OFFICER,POLICE,$84450.00
"Aaron, Kimberlei R",CHIEF CONTRACT EXPEDITER,GENERAL SERVICES,$89880.00
"Abad Jr, Vicente M",CIVIL ENGINEER IV,WATER MGMNT,$106836.00


In [131]:
chicago.columns = chicago.columns.str.upper()
chicago.columns

Index(['POSITION TITLE', 'DEPARTMENT', 'EMPLOYEE ANNUAL SALARY'], dtype='object')

In [132]:
chicago.head()

Unnamed: 0_level_0,POSITION TITLE,DEPARTMENT,EMPLOYEE ANNUAL SALARY
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"Aaron, Elvia J",WATER RATE TAKER,WATER MGMNT,$90744.00
"Aaron, Jeffery M",POLICE OFFICER,POLICE,$84450.00
"Aaron, Karina",POLICE OFFICER,POLICE,$84450.00
"Aaron, Kimberlei R",CHIEF CONTRACT EXPEDITER,GENERAL SERVICES,$89880.00
"Abad Jr, Vicente M",CIVIL ENGINEER IV,WATER MGMNT,$106836.00


## Split Strings by Characters with .str.split() Method

In [135]:
"Hello my name is Elaine".split()

['Hello', 'my', 'name', 'is', 'Elaine']

In [142]:
chicago = pd.read_csv('chicago.csv').dropna(how ='all')
chicago.Department = chicago.Department.astype('category')
chicago.head()

Unnamed: 0,Name,Position Title,Department,Employee Annual Salary
0,"AARON, ELVIA J",WATER RATE TAKER,WATER MGMNT,$90744.00
1,"AARON, JEFFERY M",POLICE OFFICER,POLICE,$84450.00
2,"AARON, KARINA",POLICE OFFICER,POLICE,$84450.00
3,"AARON, KIMBERLEI R",CHIEF CONTRACT EXPEDITER,GENERAL SERVICES,$89880.00
4,"ABAD JR, VICENTE M",CIVIL ENGINEER IV,WATER MGMNT,$106836.00


In [144]:
chicago.Name.str.split(',')

0            [AARON,   ELVIA J]
1          [AARON,   JEFFERY M]
2             [AARON,   KARINA]
3        [AARON,   KIMBERLEI R]
4        [ABAD JR,   VICENTE M]
                  ...          
32057    [ZYGADLO,   MICHAEL J]
32058     [ZYGOWICZ,   PETER J]
32059      [ZYMANTAS,   MARK E]
32060    [ZYRKOWSKI,   CARLO E]
32061    [ZYSKOWSKI,   DARIUSZ]
Name: Name, Length: 32062, dtype: object

In [148]:
chicago.Name.str.split(',').str.get(0).str.title().value_counts()

Williams        293
Johnson         244
Smith           241
Brown           185
Jones           183
               ... 
Liwanag           1
Scully            1
Lewin             1
Szymusiak         1
Strzechowski      1
Name: Name, Length: 13829, dtype: int64

In [155]:
chicago['Position Title'].str.split(" ").str.get(0).value_counts()

POLICE             10856
FIREFIGHTER-EMT     1509
SERGEANT            1186
POOL                 918
FIREFIGHTER          810
                   ...  
COMM                   1
ASSOC                  1
SUBPOENA               1
EMPLOYEE               1
DECK                   1
Name: Position Title, Length: 320, dtype: int64

## More Practice with Splits

In [156]:
chicago = pd.read_csv('chicago.csv').dropna(how ='all')
chicago.Department = chicago.Department.astype('category')
chicago.head()

Unnamed: 0,Name,Position Title,Department,Employee Annual Salary
0,"AARON, ELVIA J",WATER RATE TAKER,WATER MGMNT,$90744.00
1,"AARON, JEFFERY M",POLICE OFFICER,POLICE,$84450.00
2,"AARON, KARINA",POLICE OFFICER,POLICE,$84450.00
3,"AARON, KIMBERLEI R",CHIEF CONTRACT EXPEDITER,GENERAL SERVICES,$89880.00
4,"ABAD JR, VICENTE M",CIVIL ENGINEER IV,WATER MGMNT,$106836.00


In [158]:
chicago.Name.str.split(',').str.get(0).value_counts().head()

WILLIAMS    293
JOHNSON     244
SMITH       241
BROWN       185
JONES       183
Name: Name, dtype: int64

In [159]:
chicago.Name.str.split(',').str.get(1)

0              ELVIA J
1            JEFFERY M
2               KARINA
3          KIMBERLEI R
4            VICENTE M
             ...      
32057        MICHAEL J
32058          PETER J
32059           MARK E
32060          CARLO E
32061          DARIUSZ
Name: Name, Length: 32062, dtype: object

In [160]:
chicago.Name.str.split(',').str.get(1).str.split(' ')

0            [, , ELVIA, J]
1          [, , JEFFERY, M]
2              [, , KARINA]
3        [, , KIMBERLEI, R]
4          [, , VICENTE, M]
                ...        
32057      [, , MICHAEL, J]
32058        [, , PETER, J]
32059         [, , MARK, E]
32060        [, , CARLO, E]
32061         [, , DARIUSZ]
Name: Name, Length: 32062, dtype: object

In [161]:
chicago.Name.str.split(',').str.get(1).str.strip().str.split(' ')

0            [ELVIA, J]
1          [JEFFERY, M]
2              [KARINA]
3        [KIMBERLEI, R]
4          [VICENTE, M]
              ...      
32057      [MICHAEL, J]
32058        [PETER, J]
32059         [MARK, E]
32060        [CARLO, E]
32061         [DARIUSZ]
Name: Name, Length: 32062, dtype: object

In [162]:
chicago.Name.str.split(',').str.get(1).str.strip().str.split(' ').str.get(0)

0            ELVIA
1          JEFFERY
2           KARINA
3        KIMBERLEI
4          VICENTE
           ...    
32057      MICHAEL
32058        PETER
32059         MARK
32060        CARLO
32061      DARIUSZ
Name: Name, Length: 32062, dtype: object

## The expand and n Parameters of the str.split() Method

In [175]:
chicago.Name.str.split(',')

0            [AARON,   ELVIA J]
1          [AARON,   JEFFERY M]
2             [AARON,   KARINA]
3        [AARON,   KIMBERLEI R]
4        [ABAD JR,   VICENTE M]
                  ...          
32057    [ZYGADLO,   MICHAEL J]
32058     [ZYGOWICZ,   PETER J]
32059      [ZYMANTAS,   MARK E]
32060    [ZYRKOWSKI,   CARLO E]
32061    [ZYSKOWSKI,   DARIUSZ]
Name: Name, Length: 32062, dtype: object

In [176]:
chicago.Name.str.split(',', expand = True)

Unnamed: 0,0,1
0,AARON,ELVIA J
1,AARON,JEFFERY M
2,AARON,KARINA
3,AARON,KIMBERLEI R
4,ABAD JR,VICENTE M
...,...,...
32057,ZYGADLO,MICHAEL J
32058,ZYGOWICZ,PETER J
32059,ZYMANTAS,MARK E
32060,ZYRKOWSKI,CARLO E


In [178]:
chicago[['First Name','Last Name']]= chicago.Name.str.split(',', expand = True)
chicago

Unnamed: 0,Name,Position Title,Department,Employee Annual Salary,First Name,Last Name
0,"AARON, ELVIA J",WATER RATE TAKER,WATER MGMNT,$90744.00,AARON,ELVIA J
1,"AARON, JEFFERY M",POLICE OFFICER,POLICE,$84450.00,AARON,JEFFERY M
2,"AARON, KARINA",POLICE OFFICER,POLICE,$84450.00,AARON,KARINA
3,"AARON, KIMBERLEI R",CHIEF CONTRACT EXPEDITER,GENERAL SERVICES,$89880.00,AARON,KIMBERLEI R
4,"ABAD JR, VICENTE M",CIVIL ENGINEER IV,WATER MGMNT,$106836.00,ABAD JR,VICENTE M
...,...,...,...,...,...,...
32057,"ZYGADLO, MICHAEL J",FRM OF MACHINISTS - AUTOMOTIVE,GENERAL SERVICES,$99528.00,ZYGADLO,MICHAEL J
32058,"ZYGOWICZ, PETER J",POLICE OFFICER,POLICE,$87384.00,ZYGOWICZ,PETER J
32059,"ZYMANTAS, MARK E",POLICE OFFICER,POLICE,$84450.00,ZYMANTAS,MARK E
32060,"ZYRKOWSKI, CARLO E",POLICE OFFICER,POLICE,$87384.00,ZYRKOWSKI,CARLO E


In [180]:
chicago['Position Title'].str.split(' ', expand=True)

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,WATER,RATE,TAKER,,,,,,
1,POLICE,OFFICER,,,,,,,
2,POLICE,OFFICER,,,,,,,
3,CHIEF,CONTRACT,EXPEDITER,,,,,,
4,CIVIL,ENGINEER,IV,,,,,,
...,...,...,...,...,...,...,...,...,...
32057,FRM,OF,MACHINISTS,-,AUTOMOTIVE,,,,
32058,POLICE,OFFICER,,,,,,,
32059,POLICE,OFFICER,,,,,,,
32060,POLICE,OFFICER,,,,,,,


In [181]:
chicago['Position Title'].str.split(' ', expand=True, n=1) # create a datafram

Unnamed: 0,0,1
0,WATER,RATE TAKER
1,POLICE,OFFICER
2,POLICE,OFFICER
3,CHIEF,CONTRACT EXPEDITER
4,CIVIL,ENGINEER IV
...,...,...
32057,FRM,OF MACHINISTS - AUTOMOTIVE
32058,POLICE,OFFICER
32059,POLICE,OFFICER
32060,POLICE,OFFICER
