In [2]:
import pandas as pd

def fill_mean(row):
    """
    Function to fill in the mean value for each numerical cell in a row.
    Non-numerical cells are ignored.
    """
    mean_val = row.mean()
    return row.fillna(mean_val)

def perform_imputation(file_path):
    """
    Function to perform mean imputation on a CSV file.
    It reads the file, imputes missing numerical values, and saves the result to a new file.
    """
    # Load the CSV file
    df = pd.read_csv(file_path)

    # Separate the non-numerical and numerical columns
    non_numerical_cols = df.select_dtypes(include=['object', 'category']).columns
    numerical_cols = df.select_dtypes(include=['number']).columns

    # Perform mean imputation only on numerical columns
    df_numerical = df[numerical_cols].apply(fill_mean, axis=1)

    # Combine the non-numerical and imputed numerical columns
    df_imputed = pd.concat([df[non_numerical_cols], df_numerical], axis=1)

    # Define the output file path
    output_file_path = file_path.replace('.csv', '_imputed.csv')

    # Save the imputed data to a new CSV file
    df_imputed.to_csv(output_file_path, index=False)

    return output_file_path

# Define the path to your CSV file
file_path = 'resources/Metro_median_sale_price_uc_sfrcondo_month.csv'

# Perform the imputation
output_file_path = perform_imputation(file_path)

# Output the path to the imputed file
print(f"Imputed CSV file saved to: {output_file_path}")


Imputed CSV file saved to: resources/Metro_median_sale_price_uc_sfrcondo_month_imputed.csv
