# Analyse Yad2 — Stats & Modélisation
Ce notebook charge les données nettoyées, produit des statistiques descriptives, un **t-test** (Tel Aviv vs Jérusalem) et une **régression linéaire simple**.

In [ ]:
import pandas as pd
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
from pathlib import Path
DATA = Path('..')/'data'/'processed'/'listings_clean.csv'
df = pd.read_csv(DATA)
df.head()

In [ ]:
df.describe(include='all')

In [ ]:
# T-test Tel Aviv vs Jerusalem
tv = df[df['city'].str.contains('Tel Aviv', case=False, na=False)]['price_shekels']
jr = df[df['city'].str.contains('Jerusalem', case=False, na=False)]['price_shekels']
if len(tv)>2 and len(jr)>2:
    t, p = stats.ttest_ind(tv, jr, equal_var=False)
    print({'t_stat': t, 'p_value': p, 'n_tv': len(tv), 'n_jr': len(jr)})
else:
    print('Échantillons insuffisants pour le t-test')

In [ ]:
import numpy as np
x = df['area_sqm'].to_numpy()
y = df['price_shekels'].to_numpy()
mask = ~np.isnan(x) & ~np.isnan(y)
coef = np.polyfit(x[mask], y[mask], deg=1)
print({'intercept': float(coef[1]), 'slope': float(coef[0])})
plt.figure()
plt.scatter(x[mask], y[mask], s=10)
xs = np.linspace(x[mask].min(), x[mask].max(), 100)
ys = coef[0]*xs + coef[1]
plt.plot(xs, ys)
plt.xlabel('Surface (m²)'); plt.ylabel('Prix (₪)'); plt.title('Régression linéaire simple')
plt.show()
