In [1]:
import sqlite3
import pandas as pd

# Charger le dataset nettoyé
df = pd.read_csv("../data/processed/listings_clean.csv")

# Créer / connecter à la base SQLite
conn = sqlite3.connect("../data/processed/airbnb_analysis.db")

# Exporter le dataframe vers SQLite
df.to_sql("listings", conn, if_exists="replace", index=False)

print("Base de données créée avec succès.")
print("Nombre de lignes importées :", len(df))

conn.close()


Base de données créée avec succès.
Nombre de lignes importées : 33887


In [3]:
import sqlite3
import pandas as pd

conn = sqlite3.connect("../data/processed/airbnb_analysis.db")

# Voir les tables
tables = pd.read_sql("SELECT name FROM sqlite_master WHERE type='table';", conn)
tables



Unnamed: 0,name
0,listings


KPI 1 — Nombre d’annonces par ville

In [4]:
query = """
SELECT city, COUNT(*) AS nb_listings
FROM listings
GROUP BY city
ORDER BY nb_listings DESC;
"""
pd.read_sql(query, conn)


Unnamed: 0,city,nb_listings
0,Madrid,18833
1,Barcelona,15054


KPI 2 — Prix moyen + médiane par ville

In [5]:
query_mean = """
SELECT city,
       ROUND(AVG(price), 2) AS avg_price
FROM listings
GROUP BY city;
"""
mean_df = pd.read_sql(query_mean, conn)

query_median = """
WITH ranked AS (
  SELECT
    city,
    price,
    ROW_NUMBER() OVER (PARTITION BY city ORDER BY price) AS rn,
    COUNT(*) OVER (PARTITION BY city) AS cnt
  FROM listings
)
SELECT city,
       AVG(price) AS median_price
FROM ranked
WHERE rn IN ((cnt + 1) / 2, (cnt + 2) / 2)
GROUP BY city;
"""
median_df = pd.read_sql(query_median, conn)

mean_df.merge(median_df, on="city")


Unnamed: 0,city,avg_price,median_price
0,Barcelona,158.26,129.0
1,Madrid,134.16,110.0


KPI 3 — Prix moyen par type de logement et par ville

In [6]:
query = """
SELECT city, room_type,
       ROUND(AVG(price), 2) AS avg_price,
       COUNT(*) AS nb_listings
FROM listings
GROUP BY city, room_type
ORDER BY city, avg_price DESC;
"""
pd.read_sql(query, conn)


Unnamed: 0,city,room_type,avg_price,nb_listings
0,Barcelona,Hotel room,223.94,50
1,Barcelona,Entire home/apt,191.6,10288
2,Barcelona,Private room,85.01,4610
3,Barcelona,Shared room,76.21,106
4,Madrid,Entire home/apt,157.63,13561
5,Madrid,Hotel room,151.1,41
6,Madrid,Private room,73.94,5084
7,Madrid,Shared room,46.42,147


KPI 4 — Top 10 quartiers les plus chers (par ville)

In [7]:
query = """
SELECT city, neighbourhood,
       ROUND(AVG(price), 2) AS avg_price,
       COUNT(*) AS nb_listings
FROM listings
GROUP BY city, neighbourhood
HAVING nb_listings >= 30
ORDER BY avg_price DESC
LIMIT 10;
"""
pd.read_sql(query, conn)


Unnamed: 0,city,neighbourhood,avg_price,nb_listings
0,Barcelona,Diagonal Mar i el Front Marítim del Poblenou,241.86,126
1,Barcelona,la Dreta de l'Eixample,223.32,1902
2,Madrid,Recoletos,216.59,260
3,Barcelona,la Vila Olímpica del Poblenou,213.86,132
4,Madrid,Castellana,208.83,160
5,Madrid,Goya,187.02,313
6,Barcelona,Sant Antoni,185.49,794
7,Barcelona,l'Antiga Esquerra de l'Eixample,185.34,787
8,Barcelona,el Fort Pienc,178.73,385
9,Madrid,Cortes,176.94,855


KPI 5 — Corrélation simple (prix vs disponibilité) en SQL

In [8]:
query = """
SELECT city,
       ROUND(AVG(price),2) AS avg_price,
       ROUND(AVG(availability_365),2) AS avg_availability,
       COUNT(*) AS n
FROM listings
GROUP BY city;
"""
pd.read_sql(query, conn)


Unnamed: 0,city,avg_price,avg_availability,n
0,Barcelona,158.26,227.26,15054
1,Madrid,134.16,212.92,18833


In [9]:
conn.close()
print("Connexion SQLite fermée.")


Connexion SQLite fermée.
