# Price Analysis
## House Prices in Grand Tunis - Data Mining Project

This notebook contains comprehensive price analysis including price distributions, relationships with property features, and price comparisons across different categories.

## Import Libraries and Load Cleaned Data

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

# Load cleaned data
df = pd.read_csv('../data/processed/source_1/apartments_cleaned.csv')
print(f"Loaded dataset shape: {df.shape}")
display(df.head())

## Overall Price Distribution

In [None]:
print("Descriptive statistics for 'price' column:")
display(df['price'].describe())

plt.figure(figsize=(10, 6))
sns.boxplot(y=np.log1p(df['price']), palette='viridis')
plt.title('Overall Distribution of Property Prices (Log-transformed)')
plt.ylabel('Log(Price + 1)')
plt.tight_layout()
plt.show()

## Price Distribution by Region

In [None]:
plt.figure(figsize=(12, 8))
sns.boxplot(x=np.log1p(df['price']), y='region', data=df.sort_values('price', ascending=False), palette='plasma')
plt.title('Price Distribution per Region (Log-transformed)')
plt.xlabel('Log(Price + 1)')
plt.ylabel('Region')
plt.tight_layout()
plt.show()

## Price Distribution by City

In [None]:
plt.figure(figsize=(12, 8))
sns.boxplot(x=np.log1p(df['price']), y='city', data=df.sort_values('price', ascending=False), palette='mako')
plt.title('Price Distribution per City (Log-transformed)')
plt.xlabel('Log(Price + 1)')
plt.ylabel('City')
plt.tight_layout()
plt.show()

## Price Distribution by Property Features

In [None]:
# Price Distribution by Bathroom Count
plt.figure(figsize=(10, 6))
sns.boxplot(x='bathroom_count', y=np.log1p(df['price']), data=df, palette='cubehelix')
plt.title('Price Distribution by Bathroom Count (Log-transformed)')
plt.xlabel('Number of Bathrooms')
plt.ylabel('Log(Price + 1)')
plt.tight_layout()
plt.show()

# Price Distribution by Room Count
plt.figure(figsize=(10, 6))
sns.boxplot(x='room_count', y=np.log1p(df['price']), data=df, palette='magma')
plt.title('Price Distribution by Room Count (Log-transformed)')
plt.xlabel('Number of Rooms')
plt.ylabel('Log(Price + 1)')
plt.tight_layout()
plt.show()

# Price Range by Room Count (Real Numbers)
print("Descriptive statistics of price by room count (in kTND):")
display(df.groupby('room_count')['price'].describe())

# Price Distribution by Size
plt.figure(figsize=(10, 6))
sns.regplot(x='size', y=np.log1p(df['price']), data=df, scatter_kws={'alpha':0.6}, line_kws={'color':'red'})
plt.title('Price Distribution per Size (Log-transformed Price)')
plt.xlabel('Size (square meters)')
plt.ylabel('Log(Price + 1)')
plt.tight_layout()
plt.show()

## Details of Apartments Below 80 kTND (80,000 TND)

In [None]:
num_apartments_below_80k = len(df[df['price'] < 80.0])
print(f"Number of apartments with price below 80,000 TND: {num_apartments_below_80k}")

apartments_below_80k_df = df[df['price'] < 80.0]
display(apartments_below_80k_df)

## Details of Apartments Above 1 MTND

In [None]:
apartments_above_1M_df = df[df['price'] > 1000]

num_apartments_above_1M = len(apartments_above_1M_df)
print(f"Number of apartments with price above 1 MTND: {num_apartments_above_1M}")

display(apartments_above_1M_df)