## Cleaning
This file cleans the deq DataFrame. As I want to keep project and locality info seperate, this DataFrame required much less cleaning.

In [None]:
import pandas as pd
from IPython.core.display import HTML

deq_df = pd.read_csv('../csv_collection/deq_data.csv')
county_df = pd.read_csv('../csv_collection/cleaned_counties.csv')

display(deq_df.head(3))
display(county_df.head(3))

Unnamed: 0,Permit Name,Megawatts,Total Acres,Disturbance Zone Acres,Section 130 Permit,Mitigation Plan,County,Brownfield Site,NOI Received,Application Received,PBR Authorization Letter,Construction Commenced,Commercial Operation Commenced
0,1650 Cumberland Site 2,2.0,100,12,YES,No,Cumberland County,No,10/24/23,10/24/23,10/25/23,,
1,1650 Cumberland Solar Facility,3.0,101,20,YES,,Cumberland County,No,10/21/22,10/20/22,11/3/22,,
2,1671 Cumberland Solar Facility,3.0,66,36,YES,,Prince Edward County,No,10/21/22,11/3/22,,,


Unnamed: 0,city/county,biden_votes,biden_%,trump_votes,trump_%,other_party_votes,other_party_%,margin_votes,margin_%,total_votes,population,area,pop_density_sqmi,affiliation_2020,median_household_income,bachelors_or_over_%,age_over_50_%
0,Accomack County,7578,44.68,9172,54.07,212,1.25,-1594,-9.39,16962,33411.0,455.0,73.430769,red,57500.0,21.8,47.3
1,Albemarle County,42466,65.68,20804,32.18,1387,2.14,21662,33.5,64657,117313.0,723.0,162.258645,blue,102617.0,60.6,38.3
2,Alexandria City,66240,80.28,14544,17.63,1724,2.09,51696,62.65,82508,,,,blue,113638.0,65.8,30.2


## Cleaning the DEQ dataset

In [2]:
deq_df = deq_df.drop(columns=['Section 130 Permit'])
display(deq_df.head(2))

Unnamed: 0,Permit Name,Megawatts,Total Acres,Disturbance Zone Acres,Mitigation Plan,County,Brownfield Site,NOI Received,Application Received,PBR Authorization Letter,Construction Commenced,Commercial Operation Commenced
0,1650 Cumberland Site 2,2.0,100,12,No,Cumberland County,No,10/24/23,10/24/23,10/25/23,,
1,1650 Cumberland Solar Facility,3.0,101,20,,Cumberland County,No,10/21/22,10/20/22,11/3/22,,


In [3]:
# Only concerned with utility scale solar farms, which means 20 MW and over
deq_df = deq_df[deq_df['Megawatts'] >= 20 ]

# Renaming to snake_case
deq_df = deq_df.rename(columns={
    'Permit Name': 'permit_name',
    'Megawatts': 'megawatts',
    'Total Acres': 'total_acres',
    'Disturbance Zone Acres': 'disturbance_zone_acres',
    'Mitigation Plan': 'mitigation_plan',
    'County': 'city/county',
    'Brownfield Site': 'brownfield_site',
    'NOI Received': 'noi_received',
    'Application Received': 'application_received',
    'PBR Authorization Letter': 'pbr_authorization_letter',
    'Construction Commenced': 'construction_commenced',
    'Commercial Operation Commenced': 'commercial_operation_commenced'
})
display(deq_df.head(3))

Unnamed: 0,permit_name,megawatts,total_acres,disturbance_zone_acres,mitigation_plan,city/county,brownfield_site,noi_received,application_received,pbr_authorization_letter,construction_commenced,commercial_operation_commenced
4,360 Solar Center Project,52.0,800,450,YES,Chesterfield County,No,7/19/22,4/29/24,7/15/24,,
6,Altavista Solar LLC (FKA Dragonfly),80.0,1200,0,YES,Campbell County,No,9/12/18,5/31/19,8/8/19,3/11/20,6/4/21
7,"Alton Post Office Solar, LLC",75.0,768,501,No,Halifax County,No,10/6/17,9/15/20,12/7/20,9/20/23,12/24/24


In [4]:
# Verifying it looks correct:
print(deq_df.info())
display(HTML(deq_df.to_html()))

<class 'pandas.core.frame.DataFrame'>
Index: 79 entries, 4 to 214
Data columns (total 12 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   permit_name                     79 non-null     object 
 1   megawatts                       79 non-null     float64
 2   total_acres                     79 non-null     object 
 3   disturbance_zone_acres          73 non-null     object 
 4   mitigation_plan                 76 non-null     object 
 5   city/county                     79 non-null     object 
 6   brownfield_site                 79 non-null     object 
 7   noi_received                    79 non-null     object 
 8   application_received            79 non-null     object 
 9   pbr_authorization_letter        79 non-null     object 
 10  construction_commenced          45 non-null     object 
 11  commercial_operation_commenced  39 non-null     object 
dtypes: float64(1), object(11)
memory usage: 8.

Unnamed: 0,permit_name,megawatts,total_acres,disturbance_zone_acres,mitigation_plan,city/county,brownfield_site,noi_received,application_received,pbr_authorization_letter,construction_commenced,commercial_operation_commenced
4,360 Solar Center Project,52.0,800,450.0,YES,Chesterfield County,No,7/19/22,4/29/24,7/15/24,,
6,Altavista Solar LLC (FKA Dragonfly),80.0,1200,0.0,YES,Campbell County,No,9/12/18,5/31/19,8/8/19,3/11/20,6/4/21
7,"Alton Post Office Solar, LLC",75.0,768,501.0,No,Halifax County,No,10/6/17,9/15/20,12/7/20,9/20/23,12/24/24
12,Axton Solar,66.0,600,547.0,YES,Henry County,No,8/28/19,3/23/22,6/17/22,10/3/22,12/28/23
14,"Bartonsville Energy Facility II, LLC",50.0,478,407.0,YES,Frederick County,No,2/17/22,6/21/22,9/23/22,9/26/22,
15,"Bartonsville Energy Facility, LLC",80.0,1160,869.0,No,Frederick County,No,3/18/20,8/16/21,11/1/21,,
16,Bedford Solar Center,70.0,566,410.0,No,Chesapeake City,No,11/20/19,1/14/20,4/9/20,7/30/20,11/23/21
18,"Belcher Solar, LLC",88.2,1305,0.0,YES,Louisa County,No,10/28/16,6/20/17,8/8/17,1/14/20,6/30/21
19,Bella Terra Solar,100.0,1056,558.0,YES,Pulaski County,No,12/6/21,2/17/23,10/24/23,,
20,Blue Rock Solar,100.0,596,1127.0,No,Buckingham County,No,9/19/23,12/27/24,2/6/25,,


In [5]:
# Looks good, adding as a csv:
deq_df.to_csv('../csv_collection/deq_data_cleaned.csv', index=False)
