In [4]:
import pandas as pd
import re
import os

def process_excel_file(excel_file):

  try:
    # Read the second sheet (index 1)
    df = pd.read_excel(excel_file, sheet_name=1)

    # Drop rows with NaN values
    df = df.dropna()

    # List of values to remove
    values_to_remove = ['Male', 'Female', 'One race', 'Two or more races', 'Foreign born', 'Not a U.S. citizen', 'Never married', 'Now married, except separated', 'Divorced or separated', 'Widowed', 'Population 1 year and over for whom poverty status is determined', 'Below 100 percent of the poverty level', 'Population 1 year and over in housing units', 'Householder lived in owner-occupied housing units', 'Householder lived in renter-occupied housing units', 'Residence 1 year ago']

    # Remove rows where the first column contains the specified values
    df = df[~df.iloc[:, 0].isin(values_to_remove)]

    # Extract the year from the filename using a regular expression
    year_match = re.search(r'census(\d{4})([A-Z]{2}\d+)', excel_file)
    if year_match:
        year = year_match.group(1)
        code = year_match.group(2)

    # Transpose the DataFrame and reset the index
    df_transposed = df.set_index(df.columns[0]).T.reset_index()

    # Rename the 'index' column to 'Feature'
    df_transposed = df_transposed.rename(columns={'index': 'Feature'})

    # Add a new column with the year
    df_transposed['Year'] = year
    df_transposed['Code'] = code
    print(year +'  '+ code)
    # Drop the first three columns
    df_transposed = df_transposed.iloc[:, 3:]

    return df_transposed
  except Exception as e:
    print(f"Error processing {excel_file}: {e}")
    return None


# Get a list of all XLSX files in the current directory
xlsx_files = [f for f in os.listdir() if f.endswith('.xlsx')]

# Create an empty list to store the processed DataFrames
dfs = []

# Process each XLSX file
for file in xlsx_files:
  df_processed = process_excel_file(file)
  if df_processed is not None:
    print(df_processed)
    dfs.append(df_processed)


# Concatenate all the processed DataFrames into a single DataFrame
if dfs:
    combined_df = pd.concat(dfs)
combined_df




2024  FL26
Unnamed: 0 1 to 4 years 5 to 17 years 18 to 24 years 25 to 34 years  \
0                32,804       106,996         64,039         98,063   

Unnamed: 0 35 to 44 years 45 to 54 years 55 to 64 years 65 to 74 years  \
0                 103,776        117,372        112,609         88,246   

Unnamed: 0 75 years and over Median age (years)  ... $25,000 to $34,999  \
0                     77,765               44.5  ...             80,673   

Unnamed: 0 $35,000 to $49,999 $50,000 to $64,999 $65,000 to $74,999  \
0                      92,226             62,508             23,208   

Unnamed: 0 $75,000 or more Median income (dollars)  \
0                  103,033                  33,051   

Unnamed: 0 100 to 149 percent of the poverty level  \
0                                           72,642   

Unnamed: 0 At or above 150 percent of the poverty level  Year  Code  
0                                               610,995  2024  FL26  

[1 rows x 41 columns]
2024  FL27
Unnamed: 0 

Unnamed: 0,1 to 4 years,5 to 17 years,18 to 24 years,25 to 34 years,35 to 44 years,45 to 54 years,55 to 64 years,65 to 74 years,75 years and over,Median age (years),...,"$25,000 to $34,999","$35,000 to $49,999","$50,000 to $64,999","$65,000 to $74,999","$75,000 or more",Median income (dollars),100 to 149 percent of the poverty level,At or above 150 percent of the poverty level,Year,Code
0,32804,106996,64039,98063,103776,117372,112609,88246,77765,44.5,...,80673,92226,62508,23208,103033,33051,72642,610995,2024,FL26
0,32560,94029,57245,110746,104061,112058,95727,67403,66816,42.3,...,68014,65343,45949,19441,109331,30203,72320,557591,2024,FL27
0,34729,113741,69687,91248,103100,109602,104946,75164,55622,41.3,...,72353,86172,64721,27144,94844,35767,67668,592664,2024,FL28


In [5]:
# prompt: print only the year and code features form the df

print(combined_df[['Year', 'Code']])

Unnamed: 0  Year  Code
0           2024  FL26
0           2024  FL27
0           2024  FL28


In [6]:
# prompt: make the combined_df a csv

# Assuming the code provided is in a file named 'your_script.py'
# and you have run it to create combined_df

combined_df.to_csv('combined_data.csv', index=False)