In [7]:
import pandas as pd

# Step 1: Import the dataset and clean column names
df = pd.read_csv("Mumbai_Property.csv")

# Clean column names by removing spaces and special characters
df.columns = df.columns.str.strip()  # Remove leading/trailing spaces
df.columns = df.columns.str.replace(' ', '_')  # Replace spaces with underscores
df.columns = df.columns.str.replace('[^\w\s]', '', regex=True)  # Remove special characters

# Step 2: Handle missing values
# Depending on your dataset, you can choose to impute or remove missing values. 
# Here, we will impute missing values for numerical columns with their means.
numerical_columns = df.select_dtypes(include=['number'])
df[numerical_columns.columns] = df[numerical_columns.columns].fillna(df[numerical_columns.columns].mean())

# Step 3: Filter and subset the data
# For example, let's filter the data for a specific location (e.g., "New York") and property type (e.g., "Single Family Home").
filtered_data = df[(df['Location'] == 'New York') & (df['Property_Name'] == 'Single Family Home')]

# Step 4: Handle categorical variables by one-hot encoding
# You can use the pd.get_dummies() function for one-hot encoding.
categorical_columns = df.select_dtypes(exclude=['number'])
df = pd.get_dummies(df, columns=categorical_columns.columns)

# Now, your dataset is cleaned, missing values are imputed, and categorical variables are one-hot encoded.

# You can save the cleaned dataset to a new CSV file if needed:
df.to_csv("Cleaned_RealEstate_Prices.csv", index=False)
