In [None]:
import pandas as pd

# Load the dataset from an Excel file
file_path = 'Camp_Market.xlsx'  # Replace with your file path
data = pd.read_excel(file_path)

# Set the first row as header if necessary (adjust if your data already has headers)
data.columns = data.iloc[0]  # Set the first row as header
data = data.drop(0)  # Drop the first row as it's now header

# Convert 'Year_Birth' and 'Income' to numeric
data['Year_Birth'] = pd.to_numeric(data['Year_Birth'], errors='coerce')
data['Income'] = pd.to_numeric(data['Income'], errors='coerce')

# Find all product expenditure columns (assuming they start with 'Mnt')
product_columns = [col for col in data.columns if col.startswith('Mnt')]

# Calculate the total spent for each person
data['Total_Spent'] = data[product_columns].sum(axis=1)

# Define thresholds for cleaning
threshold_income = 5000  # Income below this is considered low
threshold_spent = 1000   # Spending above this is considered too high for low income

# Remove rows where 'Marital_Status' is 'YOLO'
data = data[data['Marital_Status'] != 'YOLO']

# Filter the data: Keep people born after 1900, and remove those with low income but high spending
filtered_data = data[
    (data['Year_Birth'] >= 1900) &  # Remove those born before 1900
    ((data['Income'] > threshold_income) |  # Keep people with reasonable income
     (data['Total_Spent'] <= threshold_spent))  # Or keep people with reasonable spending
]

# Save the cleaned data to a new Excel file
cleaned_file_path = 'Cleaned_Camp_Market.xlsx'  # You can change the file name as needed
filtered_data.to_excel(cleaned_file_path, index=False)

print("Cleaned data saved to:", cleaned_file_path)