# Exploration des capacit√©s GeoDjango avec les donn√©es GPS T-Drive

Ce notebook explore les fonctionnalit√©s spatiales de GeoDjango en utilisant le dataset T-Drive de trajectoires de taxis.

**Objectifs:**
- Charger et structurer des donn√©es GPS
- Cr√©er des objets g√©om√©triques GeoDjango
- Effectuer des op√©rations spatiales
- Analyser et visualiser les trajectoires

## 1. Configuration et Imports

In [3]:
import os
import sys
from pathlib import Path
import django
from django.conf import settings

# Only set up Django if not already configured
if not settings.configured:
    BASE_DIR = Path.cwd().parent  # server/
    sys.path.append(str(BASE_DIR))
    os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings')
    django.setup()

print("‚úì Django configur√© avec succ√®s")


‚úì Django configur√© avec succ√®s


In [4]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from django.contrib.gis.geos import Point, LineString, Polygon, MultiPoint
from django.contrib.gis.measure import D
from django.contrib.gis.db.models.functions import Distance, Area, Length
from django.db.models import Count, Avg, Max, Min, F, Sum
import matplotlib.pyplot as plt
import seaborn as sns

# Configuration des graphiques
%matplotlib inline
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")

print("‚úì Biblioth√®ques import√©es")

  machar = _get_machar(dtype)


‚úì Biblioth√®ques import√©es


In [5]:
# Import des mod√®les Django (√† adapter selon votre structure)
# from apps.mobility.models import GPSTrace, Trip
# from apps.poi.models import POI

print("‚úì Mod√®les import√©s (d√©commenter selon vos mod√®les)")

‚úì Mod√®les import√©s (d√©commenter selon vos mod√®les)


## 2. Chargement des Donn√©es T-Drive

In [None]:
import pandas as pd
import requests
from pathlib import Path
import re
import zipfile
import io

def load_tdi_trajectory_data_from_bib(bib_path, data_dir=None):
    """
    1. Parse the .bib file to extract the download URL.
    2. Download the dataset if not already present.
    3. Load trajectory data into a DataFrame.
    
    Args:
        bib_path (str or Path): Path to the .bib file.
        data_dir (str or Path, optional): Directory to store downloaded data. Defaults to '~/data/tdrive'.
    
    Returns:
        pd.DataFrame
    """
    bib_path = Path(bib_path)
    
    if data_dir is None:
        data_dir = Path.home() / "data" / "tdrive"
    data_dir.mkdir(parents=True, exist_ok=True)
    
    # Step 1: Parse the .bib file for the URL
    url = None
    with open(bib_path, 'r', encoding='utf-8') as f:
        for line in f:
            line = line.strip()
            if line.startswith('url'):
                # Extract URL between { and }
                match = re.search(r'{(.+)}', line)
                if match:
                    url = match.group(1)
                break

    if url is None:
        raise ValueError(f"No URL found in {bib_path}")
    
    # Step 2: Download the file if not already present
    filename = url.split('/')[-1]
    file_path = data_dir / filename
    if not file_path.exists():
        print(f"‚¨áÔ∏è Downloading dataset from {url} ...")
        r = requests.get(url, stream=True)
        if r.status_code == 200:
            with open(file_path, 'wb') as f:
                for chunk in r.iter_content(chunk_size=8192):
                    f.write(chunk)
            print(f"‚úÖ Downloaded to {file_path}")
        else:
            raise RuntimeError(f"Failed to download file, status code: {r.status_code}")
    else:
        print(f"üìÇ Dataset already exists at {file_path}")
    
    # Step 3: Extract if it's a zip
    if zipfile.is_zipfile(file_path):
        with zipfile.ZipFile(file_path, 'r') as zip_ref:
            zip_ref.extractall(data_dir)
            print(f"üì¶ Extracted zip to {data_dir}")
    
    # Step 4: Load the actual trajectory file (assumes CSV or TXT with taxi_id, timestamp, lon, lat)
    # Note: Adapt filename here if T-Drive structure differs
    # We'll look for the first CSV or TXT in the folder
    trajectory_files = list(data_dir.glob("**/*.txt")) + list(data_dir.glob("**/*.csv"))
    if not trajectory_files:
        raise RuntimeError(f"No trajectory files found in {data_dir}")
    
    traj_file = trajectory_files[0]
    print(f"üìä Loading trajectory data from {traj_file}")
    
    # Attempt to load CSV/TXT; you may need to adapt delimiter/columns for your dataset
    df = pd.read_csv(traj_file, delimiter=',', header=None, names=['taxi_id','timestamp','longitude','latitude'])
    
    # Convert types
    df['longitude'] = pd.to_numeric(df['longitude'], errors='coerce')
    df['latitude'] = pd.to_numeric(df['latitude'], errors='coerce')
    df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')
    df = df.dropna()
    df = df.sort_values(['taxi_id','timestamp'])
    
    print(f"‚úÖ Loaded {len(df)} GPS points from {df['taxi_id'].nunique()} taxis")
    return df


In [11]:
# Chargement des donn√©es
bib_file = BASE_DIR / "data" / "bib" / "t-drive-trajectory-data-sample.bib"
df_gps = load_tdi_trajectory_data(bib_file)

if df_gps.empty:
    print(f"‚ö†Ô∏è Aucune donn√©e GPS trouv√©e dans {bib_file}")
else:
    print(f"‚úì Donn√©es charg√©es: {len(df_gps)} points GPS")
    print(f"‚úì Nombre de taxis: {df_gps['taxi_id'].nunique()}")
    print(f"‚úì P√©riode: {df_gps['timestamp'].min()} √† {df_gps['timestamp'].max()}")
    print(f"\nAper√ßu des donn√©es:")
    display(df_gps.head(10))


‚ö†Ô∏è No trajectory data found in /home/paulh/TAI/server/data/bib/t-drive-trajectory-data-sample.bib
‚ö†Ô∏è Aucune donn√©e GPS trouv√©e dans /home/paulh/TAI/server/data/bib/t-drive-trajectory-data-sample.bib


In [None]:
# Statistiques descriptives
print("Informations sur le dataset:")
print(df_gps.info())
print("\nStatistiques:")
df_gps.describe()

## 3. Cr√©ation d'Objets G√©om√©triques GeoDjango

### 3.1 Points GPS

In [None]:
# Cr√©ation de Points GeoDjango (SRID 4326 = WGS84)
df_gps['point'] = df_gps.apply(
    lambda row: Point(row['longitude'], row['latitude'], srid=4326),
    axis=1
)

print(f"‚úì {len(df_gps)} Points cr√©√©s (SRID 4326 - WGS84)")
print(f"\nExemple de Point:")
print(df_gps['point'].iloc[0])
print(f"Type: {df_gps['point'].iloc[0].geom_type}")
print(f"Coordonn√©es: {df_gps['point'].iloc[0].coords}")

### 3.2 LineStrings (Trajectoires)

In [None]:
# Cr√©ation de LineString par trajectoire de taxi
trajectories = {}
for taxi_id in df_gps['taxi_id'].unique()[:5]:  # Premiers 5 taxis
    taxi_data = df_gps[df_gps['taxi_id'] == taxi_id].sort_values('timestamp')
    coords = [(row['longitude'], row['latitude']) for _, row in taxi_data.iterrows()]
    
    if len(coords) >= 2:
        trajectories[taxi_id] = LineString(coords, srid=4326)
        
print(f"‚úì {len(trajectories)} trajectoires LineString cr√©√©es")
print(f"\nExemple de LineString (Taxi {list(trajectories.keys())[0]}):")
first_traj = list(trajectories.values())[0]
print(f"Type: {first_traj.geom_type}")
print(f"Nombre de points: {len(first_traj.coords)}")
print(f"Premier point: {first_traj.coords[0]}")
print(f"Dernier point: {first_traj.coords[-1]}")

### 3.3 Polygon (Zone d'√©tude)

In [None]:
# Cr√©ation d'un Polygon - Bounding box autour des donn√©es
min_lon, max_lon = df_gps['longitude'].min(), df_gps['longitude'].max()
min_lat, max_lat = df_gps['latitude'].min(), df_gps['latitude'].max()

study_area = Polygon((
    (min_lon, min_lat),
    (max_lon, min_lat),
    (max_lon, max_lat),
    (min_lon, max_lat),
    (min_lon, min_lat)
), srid=4326)

print(f"‚úì Zone d'√©tude cr√©√©e: {study_area.geom_type}")
print(f"  Bounds: [{min_lon:.4f}, {min_lat:.4f}] √† [{max_lon:.4f}, {max_lat:.4f}]")
print(f"  Nombre de sommets: {len(study_area.coords[0])}")
print(f"  SRID: {study_area.srid}")

## 4. Op√©rations Spatiales GeoDjango

### 4.1 Calcul de Distances

In [None]:
sample_points = df_gps.head(10)['point'].tolist()

if len(sample_points) >= 2:
    # Distance en degr√©s (syst√®me WGS84)
    dist_degrees = sample_points[0].distance(sample_points[1])
    print(f"Distance entre 2 premiers points:")
    print(f"  En degr√©s: {dist_degrees:.6f}¬∞")
    
    # Transformation en projection m√©trique pour distance r√©elle
    p1_metric = sample_points[0].transform(3857, clone=True)  # Web Mercator
    p2_metric = sample_points[1].transform(3857, clone=True)
    dist_meters = p1_metric.distance(p2_metric)
    print(f"  En m√®tres: {dist_meters:.2f} m")
    print(f"  En kilom√®tres: {dist_meters/1000:.3f} km")

### 4.2 Longueur de Trajectoires

In [None]:
print("Longueur des trajectoires:")
print("-" * 50)

for taxi_id, trajectory in list(trajectories.items())[:3]:
    # Longueur en degr√©s
    length_deg = trajectory.length
    
    # Transformation en projection m√©trique
    traj_metric = trajectory.transform(3857, clone=True)
    length_m = traj_metric.length
    length_km = length_m / 1000
    
    print(f"Taxi {taxi_id}:")
    print(f"  Longueur: {length_km:.2f} km")
    print(f"  Nombre de points: {len(trajectory.coords)}")
    print()

### 4.3 Tests de Contenance (Contains/Within)

In [None]:
# Test si les points sont dans la zone d'√©tude
points_in_area = sum(1 for point in sample_points if study_area.contains(point))
print(f"Points dans la zone d'√©tude: {points_in_area}/{len(sample_points)}")

# Test inverse: le point est-il dans le polygone?
test_point = sample_points[0]
print(f"\nLe point {test_point.coords} est-il dans la zone?")
print(f"  study_area.contains(point): {study_area.contains(test_point)}")
print(f"  point.within(study_area): {test_point.within(study_area)}")

### 4.4 Buffer (Zones Tampons)

In [None]:
# Cr√©ation d'un buffer autour du point central
center_point = Point(
    df_gps['longitude'].mean(), 
    df_gps['latitude'].mean(), 
    srid=4326
)

# Buffer de ~500m (approximatif en degr√©s)
buffer_500m = center_point.buffer(0.005)

print(f"Point central: {center_point.coords}")
print(f"\nBuffer cr√©√©:")
print(f"  Type: {buffer_500m.geom_type}")
print(f"  Rayon: ~500m")
print(f"  Nombre de sommets: {len(buffer_500m.coords[0])}")

### 4.5 Intersections Spatiales

In [None]:
# Trajectoires intersectant le buffer
print("Analyse des intersections:")
print("-" * 50)

intersecting_trajectories = 0
for taxi_id, traj in trajectories.items():
    if traj.intersects(buffer_500m):
        intersecting_trajectories += 1
        print(f"‚úì Taxi {taxi_id} traverse la zone centrale")
    else:
        print(f"‚úó Taxi {taxi_id} ne traverse pas la zone centrale")

print(f"\nTotal: {intersecting_trajectories}/{len(trajectories)} trajectoires intersectant le buffer")

## 5. Exemples de Requ√™tes Django ORM Spatiales

Ces exemples montrent comment utiliser GeoDjango ORM pour des requ√™tes spatiales avanc√©es dans votre application Django.

In [None]:
# EXEMPLE 1: Trouver tous les points GPS dans un rayon
print("""# Requ√™te 1: Points GPS √† proximit√©
center = Point(longitude, latitude, srid=4326)
nearby_traces = GPSTrace.objects.filter(
    location__distance_lte=(center, D(km=1))
).annotate(
    distance=Distance('location', center)
).order_by('distance')[:10]
""")

In [None]:
# EXEMPLE 2: Trajectoires intersectant une zone
print("""# Requ√™te 2: Trajectoires dans une zone
zone = Polygon(coordinates, srid=4326)
trips_in_zone = Trip.objects.filter(
    trajectory__intersects=zone
).count()
""")

In [None]:
# EXEMPLE 3: POIs √† proximit√© d'une trajectoire
print("""# Requ√™te 3: POIs proches d'une trajectoire
trajectory = LineString(coords, srid=4326)
nearby_pois = POI.objects.filter(
    location__distance_lte=(trajectory, D(m=500))
).annotate(
    distance=Distance('location', trajectory)
).order_by('distance')
""")

In [None]:
# EXEMPLE 4: Longueur totale par v√©hicule
print("""# Requ√™te 4: Statistiques de distance par v√©hicule
from django.contrib.gis.db.models.functions import Length

trip_stats = Trip.objects.annotate(
    length_km=Length('trajectory') / 1000
).values('vehicle_id').annotate(
    total_km=Sum('length_km'),
    avg_km=Avg('length_km'),
    trip_count=Count('id')
).order_by('-total_km')
""")

In [None]:
# EXEMPLE 5: Bounding Box Query
print("""# Requ√™te 5: Points dans un rectangle
bbox = Polygon.from_bbox((min_lon, min_lat, max_lon, max_lat))
points_in_bbox = GPSTrace.objects.filter(
    location__within=bbox
).count()
""")

In [None]:
# EXEMPLE 6: Plus proche voisin
print("""# Requ√™te 6: POI le plus proche pour chaque trace
from django.db.models import OuterRef, Subquery

nearest_poi = POI.objects.filter(
    location__distance_lte=(OuterRef('location'), D(m=100))
).order_by(
    Distance('location', OuterRef('location'))
)[:1]

traces_with_nearest_poi = GPSTrace.objects.annotate(
    nearest_poi_id=Subquery(nearest_poi.values('id')),
    nearest_poi_distance=Subquery(
        nearest_poi.annotate(
            dist=Distance('location', OuterRef('location'))
        ).values('dist')
    )
)
""")

## 6. Analyse Statistique Spatiale

### 6.1 Calcul des Vitesses

In [None]:
# Calcul des vitesses entre points cons√©cutifs
df_gps_sorted = df_gps.sort_values(['taxi_id', 'timestamp']).copy()
df_gps_sorted['time_diff'] = df_gps_sorted.groupby('taxi_id')['timestamp'].diff().dt.total_seconds()
df_gps_sorted['dist_diff'] = 0.0

print("Calcul des distances entre points cons√©cutifs...")
for i in range(1, len(df_gps_sorted)):
    if df_gps_sorted.iloc[i]['taxi_id'] == df_gps_sorted.iloc[i-1]['taxi_id']:
        p1 = df_gps_sorted.iloc[i-1]['point'].transform(3857, clone=True)
        p2 = df_gps_sorted.iloc[i]['point'].transform(3857, clone=True)
        df_gps_sorted.iloc[i, df_gps_sorted.columns.get_loc('dist_diff')] = p1.distance(p2)

# Calcul des vitesses en km/h
df_gps_sorted['speed_kmh'] = (df_gps_sorted['dist_diff'] / df_gps_sorted['time_diff']) * 3.6
df_gps_sorted = df_gps_sorted[df_gps_sorted['speed_kmh'].notna()]
df_gps_sorted = df_gps_sorted[df_gps_sorted['speed_kmh'] < 200]  # Filtrage valeurs aberrantes

print("‚úì Vitesses calcul√©es")

In [None]:
# Statistiques de vitesse
print("STATISTIQUES DE VITESSE")
print("=" * 50)
print(f"Vitesse moyenne: {df_gps_sorted['speed_kmh'].mean():.2f} km/h")
print(f"Vitesse m√©diane: {df_gps_sorted['speed_kmh'].median():.2f} km/h")
print(f"Vitesse min: {df_gps_sorted['speed_kmh'].min():.2f} km/h")
print(f"Vitesse max: {df_gps_sorted['speed_kmh'].max():.2f} km/h")
print(f"√âcart-type: {df_gps_sorted['speed_kmh'].std():.2f} km/h")

# Percentiles
print("\nPercentiles:")
for p in [25, 50, 75, 90, 95]:
    print(f"  {p}e percentile: {df_gps_sorted['speed_kmh'].quantile(p/100):.2f} km/h")

### 6.2 Distribution Spatiale

In [None]:
print("DISTRIBUTION SPATIALE")
print("=" * 50)
print(f"Longitude: [{min_lon:.6f}, {max_lon:.6f}]")
print(f"Latitude: [{min_lat:.6f}, {max_lat:.6f}]")
print(f"√âtendue: {(max_lon - min_lon):.6f}¬∞ √ó {(max_lat - min_lat):.6f}¬∞")
print(f"\nCentre approximatif:")
print(f"  Longitude: {df_gps['longitude'].mean():.6f}")
print(f"  Latitude: {df_gps['latitude'].mean():.6f}")

### 6.3 Distribution Temporelle