In [9]:
import pandas as pd
import re

# Load the dataset and skip the instruction row
file_path = r'2022-2023 School Year Reports\Boston College.xlsx'
df = pd.read_excel(file_path, sheet_name='Student Addresses', skiprows=1)

# Replace NaN values in '6b. Street Name' with empty strings
df['6b. \nStreet Name'] = df['6b. \nStreet Name'].fillna('')

# Function to extract suffix (e.g., Road, Street, Avenue), numbers after suffix, and modify 'Not-at-Home' values
def process_address(row):
    street_name = row['6b. \nStreet Name']
    street_suffix = ''
    unit_number = ''
    
    # List of common street suffixes
    suffixes = ['Road', 'Street', 'Avenue', 'Boulevard', 'Lane', 'Drive', 'Place', 'Court', 'Terrace', 'Way', 'Circle']

    # Search for suffixes in the street name
    for suffix in suffixes:
        if suffix in street_name:
            street_suffix = suffix
            # Remove the suffix from the street name
            street_name = street_name.replace(suffix, '').strip()
            break
    
    # Extract any unit number after the street suffix (if present)
    unit_match = re.search(r'\d+$', street_name)
    if unit_match:
        unit_number = unit_match.group()
        street_name = street_name.replace(unit_number, '').strip()

    # Update the Street Suffix (6c) and Unit # (6d)
    row['6b. \nStreet Name'] = street_name
    row['6c. \nStreet Suffix  '] = street_suffix
    row['6d.\n Unit #'] = unit_number

    # Modify "City of Boston off-campus not-at-home (formally referred to as Private Housing)" to "Not at home"
    if row['9. \nAt-Home or Not-at-Home'] == "City of Boston off-campus not-at-home (formally referred to as Private Housing)":
        row['9. \nAt-Home or Not-at-Home'] = 'Not at home'
    
    return row

# Apply the function to each row
df = df.apply(process_address, axis=1)

# Save the updated dataset to a new Excel file
output_file = 'updated_boston_college_addresses.xlsx'
df.to_excel(output_file, index=False)

print(f"Updated dataset saved to {output_file}")


Updated dataset saved to updated_boston_college_addresses.xlsx
