## Change the data frame to a form more convenient for processing 

In [1]:
import pandas as pd

In [2]:
chicago = pd.read_csv("chicago.csv", index_col = "Name").dropna(how = "all")
chicago.head(3)

Unnamed: 0_level_0,Position Title,Department,Employee Annual Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"AARON, ELVIA J",WATER RATE TAKER,WATER MGMNT,$90744.00
"AARON, JEFFERY M",POLICE OFFICER,POLICE,$84450.00
"AARON, KARINA",POLICE OFFICER,POLICE,$84450.00


In [3]:
chicago.index.names = ["NAME"]

In [4]:
chicago.rename(columns = {"Position Title":"POSITION","Department":"DEPARTMENT","Employee Annual Salary":"SALARY"}, inplace = True)

In [5]:
chicago.head(3)

Unnamed: 0_level_0,POSITION,DEPARTMENT,SALARY
NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"AARON, ELVIA J",WATER RATE TAKER,WATER MGMNT,$90744.00
"AARON, JEFFERY M",POLICE OFFICER,POLICE,$84450.00
"AARON, KARINA",POLICE OFFICER,POLICE,$84450.00


In [6]:
chicago.index = chicago.index.str.strip().str.title()
chicago.tail(3)

Unnamed: 0_level_0,POSITION,DEPARTMENT,SALARY
NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"Zymantas, Mark E",POLICE OFFICER,POLICE,$84450.00
"Zyrkowski, Carlo E",POLICE OFFICER,POLICE,$87384.00
"Zyskowski, Dariusz",CHIEF DATA BASE ANALYST,DoIT,$113664.00


In [7]:
for col in chicago.columns:
    chicago[col] = chicago[col].str.strip().str.title()
chicago.tail(3)

Unnamed: 0_level_0,POSITION,DEPARTMENT,SALARY
NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"Zymantas, Mark E",Police Officer,Police,$84450.00
"Zyrkowski, Carlo E",Police Officer,Police,$87384.00
"Zyskowski, Dariusz",Chief Data Base Analyst,Doit,$113664.00


In [8]:
chicago["SALARY"] = chicago["SALARY"].str.replace("$", "").astype(float)

In [9]:
chicago.head(3)

Unnamed: 0_level_0,POSITION,DEPARTMENT,SALARY
NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"Aaron, Elvia J",Water Rate Taker,Water Mgmnt,90744.0
"Aaron, Jeffery M",Police Officer,Police,84450.0
"Aaron, Karina",Police Officer,Police,84450.0


## Group by DEPARTMENT and list the highest paid people by group 

In [10]:
chicago.nunique()

POSITION      1093
DEPARTMENT      35
SALARY        1156
dtype: int64

In [11]:
departments = chicago.groupby("DEPARTMENT")

In [12]:
df = pd.DataFrame(columns = chicago.columns)

In [13]:
for department, data in departments:
    highest = data.nlargest(1,"SALARY")
    df = pd.concat(objs = [df, pd.DataFrame(highest)])
df

  df = pd.concat(objs = [df, pd.DataFrame(highest)])


Unnamed: 0,POSITION,DEPARTMENT,SALARY
"Jackowiak, Patricia",Dir Of Administrative Hearings,Admin Hearng,156420.0
"Yamaji, Peter S",Veterinarian,Animal Contrl,123144.0
"Evans, Ginger S",Commissioner Of Aviation,Aviation,300000.0
"Bateman, Kelly Anne",Deputy Chief Administrative Officer,Board Of Election,129840.0
"Berlin, Steven",Executive Dir - Board Of Ethics,Board Of Ethics,131688.0
"Holt, Alexandra D",Budget Dir,Budget & Mgmt,169992.0
"Frydland, Judith",Commissioner Of Buildings,Buildings,157092.0
"Guerra, Maria",Commissioner Of Bus Afairs And Consumer Prot,Business Affairs,157092.0
"Mendoza, Susana A",City Clerk,City Clerk,133545.0
"Kaiden, Marla M",Chief Administrative Officer,City Council,160248.0
