# Introduction

In this notebook we will clean the data of "laws_and_incentives"

# Set up

In [1]:
import pandas as pd
import csv
from datetime import datetime

# Read Data

In [2]:
#read the file
path = '../Raw_Data/laws_and_incentives (Apr 7 2022).csv'
df = pd.read_csv(path)
df.head()

Unnamed: 0,Law Id,State,Title,Text,Enacted Date,Amended Date,Recent?,Sequence Number,Type,Agency,Significant Update Date,Expired Date,Archived Date,Repealed Date,Topic,Technology Categories,Incentive Categories,Regulation Categories,User Categories,References
0,284,US,Congestion Mitigation and Air Quality (CMAQ) I...,The CMAQ Program provides funding to state dep...,2005-08-10 00:00:00 UTC,2021-11-15 00:00:00 UTC,False,54.0,Incentives,U.S. Department of Transportation,2021-11-29 21:00:16 UTC,,,,,BIOD|ETH|ELEC|HY|IR|NG|PHEV|LPG,GNT,,STATION|FLEET|GOV,https://www.congress.gov/public-laws/117th-con...
1,288,US,Clean Cities Coalition Network,The mission of Clean Cities Coalition Network ...,,,False,21.0,Programs,U.S. Department of Energy,,,,,,AFTMKTCONV|BIOD|ETH|ELEC|EFFEC|HEV|HY|IR|NG|NE...,GNT|OTHER,,STATION|AFP|PURCH|MAN|FLEET|GOV|IND,
2,317,US,State Energy Program (SEP) Funding,The SEP provides grants to states to assist in...,,2021-11-15 00:00:00 UTC,False,26.0,Incentives,U.S. Department of Energy,2021-11-29 21:33:02 UTC,,,,,BIOD|ETH|ELEC|EFFEC|HY|NG|PHEV|LPG,GNT,,OTHER,https://www.congress.gov/public-laws/117th-con...
3,323,US,Clean School Bus,The U.S. Environmental Protection Agency\'s (E...,,2021-11-15 00:00:00 UTC,True,33.0,Incentives,U.S. Environmental Protection Agency,2022-03-24 15:26:06 UTC,,,,,AFTMKTCONV|BIOD|ETH|ELEC|EFFEC|HY|IR|NG|PHEV|LPG,GNT,,FLEET|GOV,https://www.congress.gov/public-laws/117th-con...
4,324,US,Clean Construction and Agriculture,Clean Construction is a voluntary program that...,,,False,34.0,Programs,U.S. Environmental Protection Agency,,,,,,BIOD|ELEC|HEV|HY|NG|PHEV|LPG,GNT,,PURCH|FLEET|OTHER,


# Cleaning

**Drop some unused column**

In [3]:
#show the columns and choose what we need (State,Type)
df.columns

Index(['Law Id', 'State', 'Title', 'Text', 'Enacted Date', 'Amended Date',
       'Recent?', 'Sequence Number', 'Type', 'Agency',
       'Significant Update Date', 'Expired Date', 'Archived Date',
       'Repealed Date', 'Topic', 'Technology Categories',
       'Incentive Categories', 'Regulation Categories', 'User Categories',
       'References'],
      dtype='object')

In [4]:
#drop the columns
df=df.drop(['Law Id',
 'Title',
 'Text',
 'Enacted Date',
 'Amended Date',
 'Recent?',
 'Sequence Number',
 'Agency',
 'Significant Update Date',
 'Expired Date',
 'Archived Date',
 'Repealed Date',
 'Topic',
 'Technology Categories',
 'Incentive Categories',
 'Regulation Categories',
 'User Categories',
 'References'],axis=1)

**Only keep statewide policy**

In [5]:
df_state_policy = df[df['State']!='US']
df_state_policy.head()

Unnamed: 0,State,Type
18,AZ,State Incentives
19,AZ,State Incentives
20,CA,State Incentives
21,CA,Utility/Private Incentives
22,CA,Laws and Regulations


**Convert state abbreviation to full name**

In [6]:
States = {
        'AK': 'Alaska',
        'AL': 'Alabama',
        'AR': 'Arkansas',
        'AS': 'American Samoa',
        'AZ': 'Arizona',
        'CA': 'California',
        'CO': 'Colorado',
        'CT': 'Connecticut',
        'DC': 'District of Columbia',
        'DE': 'Delaware',
        'FL': 'Florida',
        'GA': 'Georgia',
        'GU': 'Guam',
        'HI': 'Hawaii',
        'IA': 'Iowa',
        'ID': 'Idaho',
        'IL': 'Illinois',
        'IN': 'Indiana',
        'KS': 'Kansas',
        'KY': 'Kentucky',
        'LA': 'Louisiana',
        'MA': 'Massachusetts',
        'MD': 'Maryland',
        'ME': 'Maine',
        'MI': 'Michigan',
        'MN': 'Minnesota',
        'MO': 'Missouri',
        'MP': 'Northern Mariana Islands',
        'MS': 'Mississippi',
        'MT': 'Montana',
        'NA': 'National',
        'NC': 'North Carolina',
        'ND': 'North Dakota',
        'NE': 'Nebraska',
        'NH': 'New Hampshire',
        'NJ': 'New Jersey',
        'NM': 'New Mexico',
        'NV': 'Nevada',
        'NY': 'New York',
        'OH': 'Ohio',
        'OK': 'Oklahoma',
        'OR': 'Oregon',
        'PA': 'Pennsylvania',
        'PR': 'Puerto Rico',
        'RI': 'Rhode Island',
        'SC': 'South Carolina',
        'SD': 'South Dakota',
        'TN': 'Tennessee',
        'TX': 'Texas',
        'UT': 'Utah',
        'VA': 'Virginia',
        'VI': 'Virgin Islands',
        'VT': 'Vermont',
        'WA': 'Washington',
        'WI': 'Wisconsin',
        'WV': 'West Virginia',
        'WY': 'Wyoming'
}

In [7]:
df_state_policy = df_state_policy.replace({'State':States})
df_state_policy.head()

Unnamed: 0,State,Type
18,Arizona,State Incentives
19,Arizona,State Incentives
20,California,State Incentives
21,California,Utility/Private Incentives
22,California,Laws and Regulations


**Number of unique type for this data**

In [8]:
df_state_policy['Type'].unique()

array(['State Incentives', 'Utility/Private Incentives',
       'Laws and Regulations'], dtype=object)

**Count total number for each state**

In [9]:
df_total = df_state_policy.groupby('State').count().rename(columns={'Type':'Total'})
df_total.head()

Unnamed: 0_level_0,Total
State,Unnamed: 1_level_1
Alabama,8
Alaska,6
Arizona,24
Arkansas,6
California,133


# Output Cleaned Data Version

In [10]:
path = '../Processed_Data/policy_state.csv'
df_total.to_csv(path)