# Correlation Analysis and Export
## House Prices in Grand Tunis - Data Mining Project

This notebook contains the final correlation analysis, feature relationships study, and data export for the cleaned dataset.

## Import Libraries and Load Cleaned Data

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

# Load cleaned data
df = pd.read_csv('../data/processed/source_1/apartments_cleaned.csv')
print(f"Loaded dataset shape: {df.shape}")
display(df.head())

## Correlation Matrix of Numerical Features

In [None]:
numeric_df_property_prices = df.select_dtypes(include=['number'])
correlation_matrix = numeric_df_property_prices.corr()
display(correlation_matrix)

## Visualizing Correlation Matrix with Matplotlib and Seaborn

In [None]:
plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt='.2f', linewidths=.5)
plt.title('Correlation Matrix of Numeric Features in df_property_prices')
plt.show()

# Save to CSV
df.to_csv('../data/processed/source_1/apartments_cleaned_final.csv', index=False)
print("Final cleaned dataset exported successfully!")
print(f"Dataset shape: {df.shape}")
print(f"Columns: {df.columns.tolist()}")

# Summary statistics
print("\n=== FINAL DATASET SUMMARY ===")
print(f"Total number of apartments: {len(df)}")
print(f"Price range: {df['price'].min():.2f} - {df['price'].max():.2f} kTND")
print(f"Average price: {df['price'].mean():.2f} kTND")
print(f"Median price: {df['price'].median():.2f} kTND")
print(f"Size range: {df['size'].min():.0f} - {df['size'].max():.0f} mÂ²")
print(f"Cities covered: {', '.join(df['city'].unique())}")
print(f"Number of regions: {df['region'].nunique()}")
print("\nDataset ready for machine learning and further analysis!")