In [10]:
import pandas as pd

# Loads datasets for each year into pandas DataFrames while skipping descriptions
data_2010 = pd.read_csv('ACSST1Y2010.S1701-Data.csv', skiprows=1)  
data_2011 = pd.read_csv('ACSST1Y2011.S1701-Data.csv', skiprows=1)
data_2012 = pd.read_csv('ACSST1Y2012.S1701-Data.csv', skiprows=1)
data_2013 = pd.read_csv('ACSST1Y2013.S1701-Data.csv', skiprows=1)
data_2014 = pd.read_csv('ACSST1Y2014.S1701-Data.csv', skiprows=1)
data_2015 = pd.read_csv('ACSST1Y2015.S1701-Data.csv', skiprows=1)
data_2016 = pd.read_csv('ACSST1Y2016.S1701-Data.csv', skiprows=1)
data_2017 = pd.read_csv('ACSST1Y2017.S1701-Data.csv', skiprows=1)
data_2018 = pd.read_csv('ACSST1Y2018.S1701-Data.csv', skiprows=1)
data_2019 = pd.read_csv('ACSST1Y2019.S1701-Data.csv', skiprows=1)
data_2021 = pd.read_csv('ACSST1Y2021.S1701-Data.csv', skiprows=1)
data_2022 = pd.read_csv('ACSST1Y2022.S1701-Data.csv', skiprows=1)
data_2023 = pd.read_csv('ACSST1Y2023.S1701-Data.csv', skiprows=1)

# Combines all datasets into one DataFrame
all_data = pd.concat([data_2010, data_2011, data_2012, data_2013, data_2014, data_2015, 
data_2016, data_2017, data_2018, data_2019, data_2021, data_2022, data_2023], 
ignore_index=True)

# Selects the relevant columns based on research questions
relevant_columns = [
'Below poverty level!!Estimate!!Population for whom poverty status is determined',
'Total!!Estimate!!AGE!!Under 18 years',
'Total!!Estimate!!AGE!!18 to 64 years',
'Total!!Estimate!!SEX!!Male',
'Total!!Estimate!!SEX!!Female',
'Total!!Estimate!!RACE AND HISPANIC OR LATINO ORIGIN!!One race!!White',
'Total!!Estimate!!RACE AND HISPANIC OR LATINO ORIGIN!!One race!!Black or African American',
'Total!!Estimate!!EDUCATIONAL ATTAINMENT!!Population 25 years and over!!Less than high school graduate'
]

# Filters the dataset and creates a copy to avoid the SettingWithCopyWarning previously recieved
filtered_data = all_data[relevant_columns].copy()

# Renames the columns to be easier to reads
filtered_data.rename(columns={
'Below poverty level!!Estimate!!Population for whom poverty status is determined': 'Below Poverty Level (Estimate)',
'Total!!Estimate!!AGE!!Under 18 years': 'Age Under 18 years (Estimate)',
'Total!!Estimate!!AGE!!18 to 64 years': 'Age 18 to 64 years (Estimate)',
'Total!!Estimate!!SEX!!Male': 'Male (Estimate)',
'Total!!Estimate!!SEX!!Female': 'Female (Estimate)',
'Total!!Estimate!!RACE AND HISPANIC OR LATINO ORIGIN!!One race!!White': 'White (Estimate)',
'Total!!Estimate!!RACE AND HISPANIC OR LATINO ORIGIN!!One race!!Black or African American': 'Black or African American (Estimate)',
'Total!!Estimate!!EDUCATIONAL ATTAINMENT!!Population 25 years and over!!Less than high school graduate': 'Less than High School Graduate (Estimate)'
}, inplace=True)

# Adds a 'Year' column and appends the corresponding year to each dataset
years = [2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2021, 2022, 2023]

# Duplicates the list of years to match the length of filtered_data
filtered_data['Year'] = years * (len(filtered_data) // len(years))

# Saves the filtered and cleaned data to a new CSV file
filtered_data.to_csv('cleaned_ACS_data.csv', index=False)

# Displays the first few rows of the cleaned dataset 
print(filtered_data.head())


   Below Poverty Level (Estimate)  Age Under 18 years (Estimate)  \
0                      46215956.0                     73024577.0   
1                      48452035.0                     72802773.0   
2                      48760123.0                     72605436.0   
3                      48810868.0                     72382173.0   
4                      48208387.0                     72386485.0   

   Age 18 to 64 years (Estimate)  Male (Estimate)  Female (Estimate)  \
0                    189378803.0      147399161.0        154135860.0   
1                    190889167.0      148591813.0        155186380.0   
2                    191640799.0      149759499.0        156326564.0   
3                    192461043.0      150918837.0        157277946.0   
4                    193600625.0      152143433.0        158756477.0   

   White (Estimate)  Black or African American (Estimate)  \
0       224197343.0                            37200236.0   
1       225678820.0                 