In [9]:
"""
تحليل المخاطر والتحذيرات للاستثمار العقاري في دبي
الهدف: تحديد المناطق الخطرة، التحذير من الفقاعات، وتقييم المخاطر
"""

'\nتحليل المخاطر والتحذيرات للاستثمار العقاري في دبي\nالهدف: تحديد المناطق الخطرة، التحذير من الفقاعات، وتقييم المخاطر\n'

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")


In [None]:

# تحميل البيانات
print("Loading data...")
df = pd.read_csv(
    "C:/Users/tf/Desktop/مشروع 1/Data after cleaning/real_estate_tourism_merged.csv"
)

# تنظيف القيم الأساسية
df = df.dropna(subset=[
    'area_name_en',
    'avg_meter_price',
    'tourism_activity',
    'transactions_count',
    'year_month'
])

df['year_month'] = pd.to_datetime(df['year_month'])
df = df.sort_values(['area_name_en', 'year_month'])

print(f"Records loaded: {len(df)}")
print(f"Areas detected: {df['area_name_en'].nunique()}")


In [None]:

# تنعيم الأسعار والنشاط السياحي
df['price_smooth'] = (
    df.groupby('area_name_en')['avg_meter_price']
    .transform(lambda x: x.rolling(6, min_periods=3).mean())
)

df['tourism_smooth'] = (
    df.groupby('area_name_en')['tourism_activity']
    .transform(lambda x: x.rolling(6, min_periods=3).mean())
)


In [None]:

# إزاحة السياحة زمنياً 
df['tourism_lag_3'] = (
    df.groupby('area_name_en')['tourism_smooth']
    .shift(3)
)


In [None]:

# تحليل المخاطر المركبة
risk_rows = []

for area in df['area_name_en'].unique():
    area_df = df[df['area_name_en'] == area].dropna()

    if len(area_df) < 18:
        continue

    risk_score = 0
    notes = []

    price_mean = area_df['price_smooth'].mean()
    price_vol = area_df['price_smooth'].std() / price_mean

    if price_vol > 0.4:
        risk_score += 25
        notes.append("High price volatility")

    corr_lag = area_df['price_smooth'].corr(area_df['tourism_lag_3'])
    if abs(corr_lag) > 0.6:
        risk_score += 25
        notes.append("Lagged tourism sensitivity")

    avg_tx = area_df['transactions_count'].mean()
    if avg_tx < 2:
        risk_score += 20
        notes.append("Low liquidity")

    market_price = df['price_smooth'].mean()
    if price_mean > market_price * 1.4:
        risk_score += 15
        notes.append("Above market pricing")

    risk_rows.append({
        'area': area,
        'risk_score': risk_score,
        'price_volatility': round(price_vol, 3),
        'tourism_corr_lagged': round(corr_lag, 3),
        'avg_price': round(price_mean, 2),
        'avg_transactions': round(avg_tx, 2),
        'notes': " | ".join(notes)
    })

risk_df = pd.DataFrame(risk_rows).sort_values('risk_score', ascending=False)
print(f"High risk areas detected: {len(risk_df[risk_df['risk_score'] >= 50])}")


In [None]:

# تحليل الاعتماد على السياحة المتأخرة
dependency_rows = []

for area in df['area_name_en'].unique():
    area_df = df[df['area_name_en'] == area].dropna()

    if len(area_df) < 18:
        continue

    corr = area_df['price_smooth'].corr(area_df['tourism_lag_3'])

    dependency_rows.append({
        'area': area,
        'tourism_dependency_lagged': round(corr, 3),
        'avg_price': round(area_df['price_smooth'].mean(), 2)
    })

dependency_df = pd.DataFrame(dependency_rows)
print("Dependency analysis completed")


In [None]:

# تحليل استقرار الأسعار
stability_rows = []

for area in df['area_name_en'].unique():
    area_df = df[df['area_name_en'] == area].dropna()

    if len(area_df) < 24:
        continue

    price_mean = area_df['price_smooth'].mean()
    price_std = area_df['price_smooth'].std()
    price_cv = (price_std / price_mean) * 100 if price_mean > 0 else 0

    if price_cv < 15:
        stability = "Very Stable"
    elif price_cv < 25:
        stability = "Stable"
    elif price_cv < 40:
        stability = "Moderate"
    else:
        stability = "Volatile"

    stability_rows.append({
        'area': area,
        'price_volatility_%': round(price_cv, 2),
        'stability_class': stability,
        'avg_price': round(price_mean, 2),
        'transactions': int(area_df['transactions_count'].sum())
    })

stability_df = pd.DataFrame(stability_rows).sort_values('price_volatility_%')
print(f"Stability analysis completed: {len(stability_df)} areas")


In [None]:

# المخططات البيانية
fig, axes = plt.subplots(2, 2, figsize=(15, 11))

top_risk = risk_df.head(15)
axes[0, 0].barh(top_risk['area'], top_risk['risk_score'], color='#e74c3c')
axes[0, 0].invert_yaxis()
axes[0, 0].set_title("Top High Risk Areas")
axes[0, 0].set_xlabel("Risk Score")
axes[0, 0].grid(axis='x', alpha=0.3)

axes[0, 1].scatter(
    dependency_df['tourism_dependency_lagged'],
    dependency_df['avg_price'],
    alpha=0.6,
    color='#3498db'
)
axes[0, 1].axvline(x=0, color='red', linestyle='--', alpha=0.5)
axes[0, 1].set_title("Lagged Tourism Sensitivity vs Avg Price")
axes[0, 1].set_xlabel("Lagged Tourism Dependency")
axes[0, 1].set_ylabel("Average Price")
axes[0, 1].grid(True, alpha=0.3)

stability_counts = stability_df['stability_class'].value_counts()
colors = ['#27ae60', '#3498db', '#f39c12', '#e74c3c']

axes[1, 0].pie(
    stability_counts.values,
    labels=stability_counts.index,
    autopct='%1.1f%%',
    colors=colors,
    startangle=90
)
axes[1, 0].set_title("Price Stability Distribution")

axes[1, 1].axis('off')

plt.tight_layout()

print("Analysis completed")
