### Creates CSVs for each group of trades as specified in the code below

In [1]:
import pandas as pd
import numpy
import matplotlib.pyplot as plt

In [2]:
# First I checked to see the different Trades that were listed in the existing CSVs to 
# get a sense of what they were called

filename = '2017-18data/WorkforceUtilizationSummaryReport2017-2018201708.csv'
big_df = pd.read_csv(filename)

trades_list = big_df['CONSTRUCTION_TRADE'].unique() # Creating a list of the different unique values in the 
# Construction Trade column

trades_list

array(['INSULATOR (PIPES & TANKS)', 'BACKHOE/FRONT-END LOADER',
       'DRIVER / GROUNDMAN CDL', 'EQUIPMENT OPERATOR (Class B CDL)',
       'LABORER', 'LABORER: HAZARDOUS WASTE/ASBESTOS REMOVER',
       'ROLLER/SPREADER/MULCHING MACHINE',
       'TRAILERS FOR EARTH MOVING EQUIPMENT', 'CARPENTER',
       'IRONWORKER/WELDER', 'ELECTRICIAN',
       'BRICK/STONE/ARTIFICIAL MASONRY', 'LABORER: MASON TENDER',
       'PAINTER / TAPER (BRUSH,REPAINT)', 'SHEETMETAL WORKER',
       'PIPEFITTER & STEAMFITTER', 'PLUMBERS & GASFITTERS',
       'DEMO: WRECKING LABORER', 'PIPEFITTER',
       'FIRE ALARM REPAIR / MAINTENANCE',
       'HVAC (TESTING AND BALANCING - AIR)',
       'HVAC (TESTING AND BALANCING - WATER)', 'ROOFER',
       'ADS/SUBMERSIBLE PILOT', 'JOURNEYMAN LINEMAN',
       'EQUIPMENT OPERATOR (Class A CDL)', 'IRONWORKER',
       'CEMENT MASONRY/PLASTERING',
       'FIELD ENG.PARTY CHIEF-BLDG,SITE,HVY/HWY',
       'PUMP OPERATOR (CONCRETE)'], dtype=object)

In [3]:
# Here I created larger groupings for the several different existing trades. I grouped trades by similarity and gave
# the jobs names of broader categories for ease of readibility and analysis
replacements = {'INSULATOR (PIPES & TANKS)' : 'INSUALTOR', 'BACKHOE/FRONT-END LOADER' : 'LOADER',
               'DRIVER / GROUNDMAN CDL' : 'DRIVER', 'EQUIPMENT OPERATOR (Class B CDL)' : 'EQUIP_OPERATOR',
               'EQUIPMENT OPERATOR (Class A CDL)' : 'EQUIP_OPERATOR', 'PUMP OPERATOR (CONCRETE)' : 'EQUIP_OPERATOR',
               'LASER BEAM OPERATOR' : 'EQUIP_OPERATOR','LABORER: HAZARDOUS WASTE/ASBESTOS REMOVER' : 'LABORER', 
                'LABORER: MASON TENDER' : 'LABORER','DEMO: WRECKING LABORER' : 'LABORER', 
                'LABORER: CEMENT FINISHER TENDER' : 'LABORER', 'ROLLER/SPREADER/MULCHING MACHINE' : 'ROLLER', 
                'TRAILERS FOR EARTH MOVING EQUIPMENT' : 'TRAILERS', 'IRONWORKER/WELDER' : 'IRONWORKER', 
                'IRONWORKER/WELDER' : 'IRONWORKER', 'SHEETMETAL WORKER' : 'IRONWORKER', 
                'BRICK/STONE/ARTIFICIAL MASONRY' : 'MASONRY', 'CEMENT MASONRY/PLASTERING' : 'MASONRY',
                'PAINTER / TAPER (BRUSH,REPAINT)' : 'PAINTER', 'PIPEFITTER & STEAMFITTER' : 'PIPEFITTER',
                'PLUMBERS & GASFITTERS' : 'PLUMBER', 'FIRE ALARM REPAIR / MAINTENANCE' : 'MAINTENANCE', 
                'HVAC (TESTING AND BALANCING - AIR)' : 'HVAC', 'HVAC (TESTING AND BALANCING - WATER)' : 'HVAC',
                'ADS/SUBMERSIBLE PILOT' : 'PILOT', 'JOURNEYMAN LINEMAN' : 'LINEMAN', 
                'FIELD ENG.PARTY CHIEF-BLDG,SITE,HVY/HWY' : 'ENGINEER', 'PAINTER / TAPER (BRUSH, NEW)' : 'PAINTER', 
               'LABORER: CARPENTER TENDER' : 'LABORER', 'SPRINKLER FITTER' : 'MAINTENANCE',
                'ASBESTOS REMOVER - PIPE / MECH. EQUIPT' : 'ASBESTOS', 'ASBESTOS INSULATOR (PIPES & TANKS)' : 'ASBESTOS',
               'HOISTING ENGINEER/CRANES/GRADALLS' : 'ENGINEER', 'OPERATING ENGINEERS' : 'ENGINEER',
               'MARBLE MASONS, TILELAYERS & TERRAZZO MECH' : 'MASONRY', 'FENCE & GUARD RAIL ERECTOR' : 'MAINTENANCE',
               'GLAZIER (GLASS PLANK/AIR BARRIER/INTERIOR SYSTEMS)' : 'GLAZIER', 
               'PAINTER (SPRAY OR SANDBLAST,REPAINT)' : 'PAINTER', 'FLOORCOVERER' : 'MAINTENANCE',
               'ELEVATOR CONSTRUCTOR' : 'CONSTRUCTOR', 'HVAC (DUCTWORK)' : 'HVAC', 'LABORER: TREE REMOVER' : 'LABORER',
               'HVAC (ELECTRICAL CONTROLS)' : 'HVAC', 'PAINTER (SPRAY OR SANDBLAST, NEW)' : 'PAINTER',
               'ELEVATOR CONSTRUCTOR HELPER' : 'CONSTRUCTOR', 'HVAC MECHANIC' : 'HVAC',
               'BRICK/PLASTER/CEMENT MASON' : 'MASONRY', 'TELECOMMUNICATION TECHNICIAN' : 'TECHNICIAN',
               'TELEDATA WIREMAN/INSTALLER/TECHNICIAN' : 'TECHNICIAN',
               'FIELD ENG.INST.PERSON-BLDG,SITE,HVY/HWY' : 'ENGINEER','MARBLE & TILE FINISHERS' : 'MASONRY',
               'PAINTER BRUSH NEW' : 'PAINTER', 'SPECIALIZED EARTH MOVING EQUIP < 35 TONS' : 'EQUIP_OPERATOR',
               'LABORER: MULTI-TRADE TENDER' : 'LABORER', 'OTHER POWER DRIVEN EQUIPMENT - CLASS II' : 'EQUIP_OPERATOR',
               'MECHANICS MAINTENANCE' : 'MAINTENANCE', 'FORK LIFT/CHERRY PICKER' : 'EQUIP_OPERATOR',
               'OILER (OTHER THAN TRUCK CRANES,GRADALLS)' : 'OILER', 'DEMO: CONCRETE CUTTER/SAWYER' : 'EQUIP_OPERATOR',
               'MILLWRIGHT (Zone 1)' : 'MILLWRIGHT', 'TRACTORS/PORTABLE STEAM GENERATORS' : 'EQUIP_OPERATOR',
               'PAINTER BRUSH REPAINT' : 'PAINTER', 'FIELD ENG.ROD PERSON-BLDG,SITE,HVY/HWY' : 'ENGINEER',
               'BULLDOZER/GRADER/SCRAPER' : 'EQUIP_OPERATOR', 'MARBLE- TILE-TERRAZZO MECHANIC' : 'MASONRY',
               'FIRE ALARM INSTALLER' : 'MAINTENANCE', 'GENERATOR/LIGHTING PLANT/HEATERS' : 'EQUIP_OPERATOR',
               'ROOFER SLATE / TILE / PRECAST CONCRETE' : 'MASONRY', 'ASPHALT RAKER' : 'RAKER',
               'CAISSON & UNDERPINNING LABORER' : 'LABORER', 'VAC-HAUL' : 'HVAC', 'PIPELAYER' : 'PIPEFITTER',
               'SPECIALIZED EARTH MOVING EQUIP > 35 TONS' : 'EQUIP_OPERATOR'}

filename = '2017-18data/WorkforceUtilizationSummaryReport2017-2018201708.csv'
big_df = pd.read_csv(filename)
big_df['CONSTRUCTION_TRADE'].replace(replacements, inplace=True) # Created one big Dataframe of all the CSVs we have

years = ['2017']
months = ['10', '11']

for y in years: 
    for m in months:
        filename = '2017-18data/WorkforceUtilizationSummaryReport2017-2018' + y + m +'.csv'
        df = pd.read_csv(filename)
        df['CONSTRUCTION_TRADE'].replace(replacements, inplace=True)
        big_df = pd.concat([big_df, df])
        
# For each month of every year, it replaces all the Trade values in the CONSTRUCTION_TRADE colunmn with the 
# appropriate broad category, then concatenates it to the larger Dataframe of all the data

# I repeat this down below for every year

In [4]:
years = ['2018']
months = ['01', '02', '05', '06', '08', '10', '11', '12']

for y in years:
    for m in months:
        filename = '2017-18data/WorkforceUtilizationSummaryReport2017-2018' + y + m +'.csv'
        df = pd.read_csv(filename)
        df['CONSTRUCTION_TRADE'].replace(replacements, inplace=True)
        big_df = pd.concat([big_df, df])

In [5]:
years = ['2019', '2020']
months = ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12']

for y in years:
    for m in months:
        filename = y + 'data/WorkforceUtilizationSummaryReport' + y + m +'.csv'
        df = pd.read_csv(filename)
        df['CONSTRUCTION_TRADE'].replace(replacements, inplace=True)
        big_df = pd.concat([big_df, df])

In [6]:
big_df

Unnamed: 0,MONTH,YEAR,PROJECT,PROJECT_CODE,CONTRACTOR,CONSTRUCTION_TRADE,CRAFT_LEVEL,TOTAL_EMPLOYEE,CAUCASIAN,AFRICAN_AMERICAN,HISPANIC,ASIAN,NATIVE_AMERICAN,OTHER,NOT_SPECIFIED,TOTAL_FEMALE,TOTAL_MALE,HOURS_WORKED_PER_MONTH
0,8,2017,AEP1407E UT1 C AEP Utility Contract-Cape Cod,AEP 1407E UT1,Rise engineering,INSUALTOR,Journeymen,16.0,0.0,0.0,8.0,8.0,0.0,0.0,0.0,0.0,16.0,HOURS_PER_MONTH
1,8,2017,AEP1407E UT1 C AEP Utility Contract-Cape Cod,AEP 1407E UT1,Rise engineering,INSUALTOR,Apprentice,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,HOURS_PER_MONTH
2,8,2017,CME0902 DC1 C Chief Medical Examiner Office We...,CME0902 DC1 CM,Crestview construction & trucking inc.,LOADER,Journeymen,247.5,247.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,247.5,HOURS_PER_MONTH
3,8,2017,CME0902 DC1 C Chief Medical Examiner Office We...,CME0902 DC1 CM,Crestview construction & trucking inc.,LOADER,Apprentice,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,HOURS_PER_MONTH
4,8,2017,CME0902 DC1 C Chief Medical Examiner Office We...,CME0902 DC1 CM,Crestview construction & trucking inc.,DRIVER,Journeymen,127.0,127.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,127.0,HOURS_PER_MONTH
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
373,12,2020,TRC1702 HC1 C Suffolk County Courthouse Elevat...,TRC1702 HC1 C,3 phase elevator,CONSTRUCTOR,Apprentice,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,HOURS_PER_MONTH
374,12,2020,TRC1702 HC1 C Suffolk County Courthouse Elevat...,TRC1702 HC1 C,3 phase elevator,CONSTRUCTOR,Journeymen,380.0,380.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,380.0,HOURS_PER_MONTH
375,12,2020,TRC1702 HC1 C Suffolk County Courthouse Elevat...,TRC1702 HC1 C,3 phase elevator,CONSTRUCTOR,Apprentice,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,HOURS_PER_MONTH
376,12,2020,TRC1702 HC1 C Suffolk County Courthouse Elevat...,TRC1702 HC1 C,"Annese electrical services, inc.",ELECTRICIAN,Journeymen,11.0,11.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,11.0,HOURS_PER_MONTH


In [7]:
# Here we can see how we have simplified the different Trades across the years into a more readable version

trades_list = big_df['CONSTRUCTION_TRADE'].unique()

trades_list

array(['INSUALTOR', 'LOADER', 'DRIVER', 'EQUIP_OPERATOR', 'LABORER',
       'ROLLER', 'TRAILERS', 'CARPENTER', 'IRONWORKER', 'ELECTRICIAN',
       'MASONRY', 'PAINTER', 'PIPEFITTER', 'PLUMBER', 'MAINTENANCE',
       'HVAC', 'ROOFER', 'PILOT', 'LINEMAN', 'ENGINEER', 'OILER',
       'CONSTRUCTOR', 'MILLWRIGHT', 'TECHNICIAN', 'GLAZIER', 'ASBESTOS',
       'DIVER', 'RAKER'], dtype=object)

In [8]:
grouped = big_df.groupby(big_df.CONSTRUCTION_TRADE) # This groups the different records in the Dataframe by their trade

for trade in trades_list: # For each trade, the loop creates its own CSV based on the grouping
    df = grouped.get_group(trade)
    filename = r'WorkforceUtilizationReport-' + trade + '.csv'
    df.to_csv(filename, index = False)

In [9]:
len(trades_list)

28