# Data Export

This notebook handles:
- Preparing cleaned data for export
- Saving processed data to CSV file

In [None]:
import pandas as pd
import numpy as np

In [None]:
# Load the preprocessed data
df = pd.read_csv('/content/data_prices_cleaned.csv')

# Apply the same preprocessing steps
def clean_numeric_column(series):
    cleaned_series = series.astype(str).str.replace(' ', '', regex=False)
    cleaned_series = cleaned_series.str.replace(',', '.', regex=False)
    cleaned_series = cleaned_series.replace(['À Vendre', 'À Louer', 'Location', '+', 'Ref924a', 'IFC Marsa', 'sale', 'nan', 'None'], pd.NA)
    return pd.to_numeric(cleaned_series, errors='coerce')

df['superficie'] = clean_numeric_column(df['superficie'])
df['chambres'] = clean_numeric_column(df['chambres'])
df['salles_de_bains'] = clean_numeric_column(df['salles_de_bains'])
df['price'] = clean_numeric_column(df['price'])

df = df.rename(columns={'superficie': 'size', 'chambres': 'room_count', 'salles_de_bains': 'bathroom_count'})
grand_tunis_states = ['Ben Arous', 'Tunis', 'La Manouba', 'Ariana']
df = df[(df['transaction'] == 'sale') & (df['category'] == 'Appartements') & (df['state'].isin(grand_tunis_states))].copy()
df['price'] = df['price']/1000
df = df[(df['size'] < 500) & (df['size'] >= 24)]
df = df[df['price']>20]
df = df[~(df['price']/df['size']>6)]
df = df[~((df['size'] > 70) & (df['price'] < 70))]
df = df[~((df['size']<90) & (df['price'])>1000)]
df = df[(df['room_count']>0) & (df['room_count']<10)]
df = df[df['bathroom_count']>=0]
df = df.drop(columns=['contact', 'category', 'location', 'descriptions', 'currency' , 'date','transaction','titles','shops','profiles'])
df.dropna(subset=['price', 'size', 'room_count', 'bathroom_count'], inplace=True)

print(f"Data loaded: {df.shape}")

In [None]:
df

## Prepare Data for Export

In [None]:
print("\n" + "="*80)
print("STEP 9: PREPARING DATA FOR EXPORT")
print("="*80)

# Select and rename columns as requested by the user
df_export = df[['room_count', 'bathroom_count', 'size', 'price', 'state', 'city']].copy()
df_export = df_export.rename(columns={'state': 'city', 'city': 'region'})

print("Displaying the first 5 rows of the export-ready DataFrame:")
display(df_export.head())

## Save to CSV

In [None]:
# Save the processed DataFrame to a CSV file
output_csv_path = 'processed_apartment_data.csv'
df_export.to_csv(output_csv_path, index=False)

print(f"\nData successfully saved to '{output_csv_path}'")
print(f"Exported DataFrame shape: {df_export.shape}")