<a href="https://colab.research.google.com/github/ADEV1010/TrafficNavigator/blob/main/DataHackathon_TrafficNavigator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install tqdm



In [2]:
!pip install pytz requests-cache

Collecting requests-cache
  Downloading requests_cache-1.2.1-py3-none-any.whl.metadata (9.9 kB)
Collecting cattrs>=22.2 (from requests-cache)
  Downloading cattrs-24.1.0-py3-none-any.whl.metadata (8.4 kB)
Collecting url-normalize>=1.4 (from requests-cache)
  Downloading url_normalize-1.4.3-py2.py3-none-any.whl.metadata (3.1 kB)
Downloading requests_cache-1.2.1-py3-none-any.whl (61 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.4/61.4 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading cattrs-24.1.0-py3-none-any.whl (66 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m66.2/66.2 kB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading url_normalize-1.4.3-py2.py3-none-any.whl (6.8 kB)
Installing collected packages: url-normalize, cattrs, requests-cache
Successfully installed cattrs-24.1.0 requests-cache-1.2.1 url-normalize-1.4.3


In [3]:
import asyncio
import aiohttp
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import time
import json
from aiohttp import ClientSession, TCPConnector
from asyncio import Semaphore
import nest_asyncio
from tqdm import tqdm
import pytz
import requests
import requests_cache

In [4]:
# Apply nest_asyncio to allow nested event loops
nest_asyncio.apply()

# Set up caching for weather API requests
requests_cache.install_cache('weather_cache', expire_after=3600)

In [5]:
API_KEY = 'SkudnJXWgFsH0cP0VEWQNRtxD5oBcASd'
WEATHER_API_KEY = '28edebb4a2f283c1805f913584ad2905'
BASE_URL = "https://api.tomtom.com/traffic/services/4/flowSegmentData/absolute/10/json"
WEATHER_URL = "http://api.openweathermap.org/data/2.5/weather"

# Define a list of coordinates covering the area of interest
COORDINATES = [
    (40.7128, -74.0060),  # New York City
    (40.7831, -73.9712),  # Upper Manhattan
    (40.6782, -73.9442),  # Brooklyn
    (40.7282, -73.7949),  # Queens
    (40.8448, -73.8648),  # Bronx
    (40.5795, -74.1502),  # Staten Island
]

# Semaphore to limit concurrent requests
MAX_CONCURRENT_REQUESTS = 20
semaphore = Semaphore(MAX_CONCURRENT_REQUESTS)

# Time zone for New York City
NYC_TZ = pytz.timezone('America/New_York')

In [6]:
async def fetch_traffic_data(session, latitude, longitude):
    params = {
        'key': API_KEY,
        'point': f"{latitude},{longitude}"
    }
    async with semaphore:
        try:
            async with session.get(BASE_URL, params=params, timeout=10) as response:
                if response.status == 200:
                    return await response.json()
                else:
                    print(f"Error: {response.status} for coordinates {latitude}, {longitude}")
                    return None
        except asyncio.TimeoutError:
            print(f"Timeout for coordinates {latitude}, {longitude}")
            return None

In [7]:
def get_weather(lat, lon):
    params = {
        'lat': lat,
        'lon': lon,
        'appid': WEATHER_API_KEY,
        'units': 'metric'
    }
    try:
        response = requests.get(WEATHER_URL, params=params)
        if response.status_code == 200:
            data = response.json()
            return {
                'temperature': data['main']['temp'],
                'humidity': data['main']['humidity'],
                'weather_condition': data['weather'][0]['main']
            }
        else:
            print(f"Error fetching weather data: {response.status_code}")
            return None
    except Exception as e:
        print(f"Exception while fetching weather data: {e}")
        return None

In [8]:
def process_traffic_data(data, latitude, longitude):
    if not data:
        return None

    flow_data = data['flowSegmentData']
    current_time = datetime.now(NYC_TZ)
    weather_data = get_weather(latitude, longitude)

    processed_data = {
        'timestamp': current_time.isoformat(),
        'latitude': latitude,
        'longitude': longitude,
        'current_speed': flow_data['currentSpeed'],
        'free_flow_speed': flow_data['freeFlowSpeed'],
        'confidence': flow_data['confidence'],
        'road_closure': flow_data['roadClosure'],
        'hour': current_time.hour,
        'day_of_week': current_time.weekday(),
        'is_weekend': 1 if current_time.weekday() >= 5 else 0,
        'is_rush_hour': 1 if current_time.hour in [7, 8, 9, 16, 17, 18] else 0,
        'temperature': None,
        'humidity': None,
        'weather_condition': None
    }

    if weather_data:
        processed_data.update(weather_data)

    return processed_data

In [9]:
async def fetch_and_process(session, coords):
    lat, lon = coords
    raw_data = await fetch_traffic_data(session, lat, lon)
    return process_traffic_data(raw_data, lat, lon)

In [10]:
async def main_async():
    connector = TCPConnector(limit=MAX_CONCURRENT_REQUESTS)
    async with ClientSession(connector=connector) as session:
        tasks = [fetch_and_process(session, coords) for coords in COORDINATES]
        results = []
        for f in tqdm(asyncio.as_completed(tasks), total=len(tasks), desc="Fetching data"):
            result = await f
            if result:
                results.append(result)

    if not results:
        print("No data was collected. Exiting.")
        return []

    # Convert results to DataFrame for easier processing
    df = pd.DataFrame(results)

    # Calculate derived features
    df['speed_ratio'] = df['current_speed'] / df['free_flow_speed']
    df['congestion_level'] = pd.cut(df['speed_ratio'],
                                    bins=[0, 0.5, 0.75, 1, float('inf')],
                                    labels=['Heavy', 'Moderate', 'Light', 'Free Flow'])

    # Calculate historical averages (assuming we have previous data)
    historical_data = load_historical_data()
    if historical_data is not None:
        df = pd.concat([historical_data, df])

    df['avg_speed_hour'] = df.groupby(['hour'])['current_speed'].transform('mean')
    df['avg_speed_day'] = df.groupby(['day_of_week'])['current_speed'].transform('mean')
    df['avg_speed_location'] = df.groupby(['latitude', 'longitude'])['current_speed'].transform('mean')

    # Save the updated DataFrame
    df.to_csv('traffic_data.csv', index=False)

    print(f"Processed data for {len(results)} locations.")
    return df.tail(len(results)).to_dict('records')

In [11]:
def load_historical_data():
    try:
        return pd.read_csv('traffic_data.csv')
    except FileNotFoundError:
        print("No historical data found. Starting fresh.")
        return None

In [12]:
def run_once():
    """Run the data collection once and return the results."""
    loop = asyncio.get_event_loop()
    return loop.run_until_complete(main_async())

In [13]:
def run_continuously(num_iterations=5, interval=60):
    """Run the data collection a specified number of times with a given interval."""
    for i in range(num_iterations):
        start_time = time.time()

        results = run_once()

        print(f"\nSample of collected data (first entry):")
        if results:
            print(json.dumps(results[0], indent=2))
        else:
            print("No data collected in this iteration.")

        elapsed_time = time.time() - start_time
        sleep_time = max(0, interval - elapsed_time)

        print(f"\nIteration {i+1}/{num_iterations}: Processed data in {elapsed_time:.2f} seconds. Sleeping for {sleep_time:.2f} seconds.")

        if i < num_iterations - 1:  # Don't sleep after the last iteration
            time.sleep(sleep_time)

    print("\nData collection completed. Checking 'traffic_data.csv'...")
    try:
        df = pd.read_csv('traffic_data.csv')
        print(f"Successfully read 'traffic_data.csv'. It contains {len(df)} rows.")
        print("\nFirst few rows of the data:")
        print(df.head().to_string())
    except FileNotFoundError:
        print("'traffic_data.csv' not found. Please check if the file was created.")
    except Exception as e:
        print(f"An error occurred while reading 'traffic_data.csv': {e}")

In [14]:
#run_continuously(num_iterations=3, interval=30)

In [15]:
results = run_once()
print(results)

Fetching data: 100%|██████████| 6/6 [00:01<00:00,  3.71it/s]

No historical data found. Starting fresh.
Processed data for 6 locations.
[{'timestamp': '2024-09-03T15:38:32.556914-04:00', 'latitude': 40.6782, 'longitude': -73.9442, 'current_speed': 17, 'free_flow_speed': 29, 'confidence': 1, 'road_closure': False, 'hour': 15, 'day_of_week': 1, 'is_weekend': 0, 'is_rush_hour': 0, 'temperature': 22.97, 'humidity': 32, 'weather_condition': 'Clear', 'speed_ratio': 0.5862068965517241, 'congestion_level': 'Moderate', 'avg_speed_hour': 19.0, 'avg_speed_day': 19.0, 'avg_speed_location': 17.0}, {'timestamp': '2024-09-03T15:38:32.692019-04:00', 'latitude': 40.7282, 'longitude': -73.7949, 'current_speed': 18, 'free_flow_speed': 24, 'confidence': 1, 'road_closure': False, 'hour': 15, 'day_of_week': 1, 'is_weekend': 0, 'is_rush_hour': 0, 'temperature': 22.89, 'humidity': 34, 'weather_condition': 'Clouds', 'speed_ratio': 0.75, 'congestion_level': 'Moderate', 'avg_speed_hour': 19.0, 'avg_speed_day': 19.0, 'avg_speed_location': 18.0}, {'timestamp': '2024-09-03T15




In [16]:
def process_traffic_data(data, latitude, longitude):
    if not data:
        return None

    flow_data = data['flowSegmentData']
    current_time = datetime.now(NYC_TZ)
    weather_data = get_weather(latitude, longitude)

    processed_data = {
        'timestamp': current_time.isoformat(),
        'latitude': latitude,
        'longitude': longitude,
        'current_speed': flow_data['currentSpeed'],
        'free_flow_speed': flow_data['freeFlowSpeed'],
        'confidence': flow_data['confidence'],
        'road_closure': flow_data['roadClosure'],
        'hour': current_time.hour,
        'day_of_week': current_time.weekday(),
        'is_weekend': 1 if current_time.weekday() >= 5 else 0,
        'is_rush_hour': 1 if current_time.hour in [7, 8, 9, 16, 17, 18] else 0,
        'time_of_day': get_time_of_day(current_time.hour),
        'temperature': None,
        'humidity': None,
        'weather_condition': None
    }

    if weather_data:
        processed_data.update(weather_data)

    return processed_data

In [17]:
def get_time_of_day(hour):
    if 5 <= hour < 12:
        return 'morning'
    elif 12 <= hour < 17:
        return 'afternoon'
    elif 17 <= hour < 21:
        return 'evening'
    else:
        return 'night'

In [18]:
async def main_async():
    connector = TCPConnector(limit=MAX_CONCURRENT_REQUESTS)
    async with ClientSession(connector=connector) as session:
        tasks = [fetch_and_process(session, coords) for coords in COORDINATES]
        results = []
        for f in tqdm(asyncio.as_completed(tasks), total=len(tasks), desc="Fetching data"):
            result = await f
            if result:
                results.append(result)

    if not results:
        print("No data was collected. Exiting.")
        return []

    # Convert results to DataFrame for easier processing
    df = pd.DataFrame(results)

    # Calculate derived features
    df['speed_ratio'] = df['current_speed'] / df['free_flow_speed']
    df['congestion_level'] = pd.cut(df['speed_ratio'],
                                    bins=[0, 0.5, 0.75, 1, float('inf')],
                                    labels=['Heavy', 'Moderate', 'Light', 'Free Flow'])

    # Load historical data
    historical_data = load_historical_data()
    if historical_data is not None:
        df = pd.concat([historical_data, df])

    # Calculate time-based averages
    df['avg_speed_hour'] = df.groupby(['hour'])['current_speed'].transform('mean')
    df['avg_speed_day'] = df.groupby(['day_of_week'])['current_speed'].transform('mean')
    df['avg_speed_time_of_day'] = df.groupby(['time_of_day'])['current_speed'].transform('mean')

    # Calculate location-based averages
    df['avg_speed_location'] = df.groupby(['latitude', 'longitude'])['current_speed'].transform('mean')

    # Calculate weather-based averages (if weather data is available)
    if 'weather_condition' in df.columns and df['weather_condition'].notna().any():
        df['avg_speed_weather'] = df.groupby(['weather_condition'])['current_speed'].transform('mean')

    # Calculate historical congestion patterns
    df['historical_congestion'] = df.groupby(['latitude', 'longitude', 'day_of_week', 'hour'])['congestion_level'].transform(lambda x: x.mode().iloc[0] if not x.empty else None)

    # Calculate speed variance
    df['speed_variance'] = df.groupby(['latitude', 'longitude'])['current_speed'].transform('var')

    # Calculate peak hour flag
    df['is_peak_hour'] = ((df['hour'] >= 7) & (df['hour'] <= 9)) | ((df['hour'] >= 16) & (df['hour'] <= 18))

    # Calculate speed difference from average
    df['speed_diff_from_avg'] = df['current_speed'] - df['avg_speed_location']

    # Save the updated DataFrame
    df.to_csv('traffic_data.csv', index=False)

    print(f"Processed data for {len(results)} locations.")
    return df.tail(len(results)).to_dict('records')


In [19]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.metrics import mean_squared_error, accuracy_score
from sklearn.preprocessing import LabelEncoder

In [20]:
import asyncio
import aiohttp
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import time
import json
from aiohttp import ClientSession, TCPConnector
from asyncio import Semaphore
import nest_asyncio
from tqdm import tqdm
import pytz
import requests
import requests_cache
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.metrics import mean_squared_error, accuracy_score
from sklearn.preprocessing import LabelEncoder

In [21]:
def train_models(X, y_speed, y_congestion):
    # Split the data
    X_train, X_test, y_speed_train, y_speed_test, y_congestion_train, y_congestion_test = train_test_split(
        X, y_speed, y_congestion, test_size=0.2, random_state=42)

    # Train Random Forest for speed prediction
    rf_speed = RandomForestRegressor(n_estimators=100, random_state=42)
    rf_speed.fit(X_train, y_speed_train)
    speed_pred = rf_speed.predict(X_test)
    speed_mse = mean_squared_error(y_speed_test, speed_pred)
    speed_rmse = np.sqrt(speed_mse)

    # Train Random Forest for congestion prediction
    rf_congestion = RandomForestClassifier(n_estimators=100, random_state=42)
    rf_congestion.fit(X_train, y_congestion_train)
    congestion_pred = rf_congestion.predict(X_test)
    congestion_accuracy = accuracy_score(y_congestion_test, congestion_pred)

    print(f"Speed Prediction RMSE: {speed_rmse:.2f}")
    print(f"Congestion Prediction Accuracy: {congestion_accuracy:.2f}")

    return rf_speed, rf_congestion

In [22]:

def predict_traffic(models, X):
    rf_speed, rf_congestion = models
    speed_pred = rf_speed.predict(X)
    congestion_pred = rf_congestion.predict(X)
    return speed_pred, congestion_pred

In [23]:
async def fetch_new_data():
    connector = TCPConnector(limit=MAX_CONCURRENT_REQUESTS)
    async with ClientSession(connector=connector) as session:
        tasks = [fetch_and_process(session, coords) for coords in COORDINATES]
        results = []
        for f in asyncio.as_completed(tasks):
            result = await f
            if result:
                results.append(result)
    return results


In [24]:
def make_predictions(models, new_data, scaler, le):
    rf_speed, rf_congestion = models
    df = pd.DataFrame(new_data)

    # Prepare data for prediction
    X, _, _, _ = prepare_data_for_model(df)
    X_scaled = scaler.transform(X)

    # Make predictions
    speed_pred = rf_speed.predict(X_scaled)
    congestion_pred = rf_congestion.predict(X_scaled)

    # Add predictions to the DataFrame
    df['predicted_speed'] = speed_pred
    df['predicted_congestion'] = le.inverse_transform(congestion_pred)

    return df

In [25]:
from sklearn.preprocessing import StandardScaler

In [26]:
def prepare_data_for_model(df):
    # Create congestion_level if it doesn't exist
    if 'congestion_level' not in df.columns:
        df['speed_ratio'] = df['current_speed'] / df['free_flow_speed']
        df['congestion_level'] = pd.cut(df['speed_ratio'],
                                        bins=[0, 0.5, 0.75, 1, float('inf')],
                                        labels=['Heavy', 'Moderate', 'Light', 'Free Flow'])

    # Select features for the model
    features = ['hour', 'day_of_week', 'is_weekend', 'is_rush_hour', 'is_peak_hour',
                'temperature', 'humidity', 'speed_ratio', 'avg_speed_hour',
                'avg_speed_day', 'avg_speed_location', 'avg_speed_time_of_day',
                'speed_variance', 'speed_diff_from_avg']

    # Ensure all selected features are present
    features = [f for f in features if f in df.columns]

    X = df[features]
    y_speed = df['current_speed']
    y_congestion = df['congestion_level']

    # Encode categorical variables
    le = LabelEncoder()
    y_congestion = le.fit_transform(y_congestion)

    return X, y_speed, y_congestion, le

In [27]:
!pip install networkx



In [28]:
import networkx as nx
import numpy as np

In [29]:
def calculate_edge_cost(G, node1, node2, predicted_speed_weight=0.7):
    dist = G[node1][node2]['weight']
    current_speed = (G.nodes[node1]['current_speed'] + G.nodes[node2]['current_speed']) / 2
    predicted_speed = (G.nodes[node1]['predicted_speed'] + G.nodes[node2]['predicted_speed']) / 2

    # Combine current and predicted speeds
    effective_speed = current_speed * (1 - predicted_speed_weight) + predicted_speed * predicted_speed_weight

    # Adjust for congestion
    congestion_factor = 1
    if G.nodes[node1]['congestion_level'] == 'Heavy' or G.nodes[node2]['congestion_level'] == 'Heavy':
        congestion_factor = 2
    elif G.nodes[node1]['congestion_level'] == 'Moderate' or G.nodes[node2]['congestion_level'] == 'Moderate':
        congestion_factor = 1.5

    # Calculate time to travel this edge
    time = (dist / effective_speed) * congestion_factor

    return time


In [30]:
def build_graph(traffic_data):
    G = nx.Graph()
    for data in traffic_data:
        node = (data['latitude'], data['longitude'])
        G.add_node(node,
                   current_speed=data['current_speed'],
                   predicted_speed=data['predicted_speed'],
                   congestion_level=data['congestion_level'])

    # Connect all nodes to ensure a fully connected graph
    nodes = list(G.nodes())
    for i in range(len(nodes)):
        for j in range(i+1, len(nodes)):
            node1, node2 = nodes[i], nodes[j]
            dist = np.sqrt((node1[0]-node2[0])**2 + (node1[1]-node2[1])**2)
            G.add_edge(node1, node2, weight=dist)

    return G

In [31]:
def optimize_route(G, start, end):
    def cost_function(node1, node2, edge_data):
        return calculate_edge_cost(G, node1, node2)

    try:
        path = nx.dijkstra_path(G, start, end, weight=cost_function)
        total_cost = nx.dijkstra_path_length(G, start, end, weight=cost_function)
        return path, total_cost
    except nx.NetworkXNoPath:
        print(f"No path found between {start} and {end}. Graph information:")
        print(f"Number of nodes: {G.number_of_nodes()}")
        print(f"Number of edges: {G.number_of_edges()}")
        print(f"Is the graph connected? {nx.is_connected(G)}")
        return None, None

In [32]:
import networkx as nx

In [None]:
import asyncio
import pandas as pd
import numpy as np
from datetime import datetime
import pytz
from sklearn.preprocessing import StandardScaler
import joblib

async def main():
    # Initial setup
    num_iterations = 1
    interval = 20
    all_results = []
    NYC_TZ = pytz.timezone('America/New_York')

    # Initial data collection
    print("Starting initial data collection...")
    for i in range(num_iterations):
        results = await fetch_new_data()
        all_results.extend(results)
        print(f"Iteration {i+1}/{num_iterations}: Collected {len(results)} data points.")
        await asyncio.sleep(interval)

    print("\nInitial data collection completed. Processing data and training models...")

    # Convert all results to DataFrame
    df = pd.DataFrame(all_results)

    # Calculate derived features
    df['speed_ratio'] = df['current_speed'] / df['free_flow_speed']
    df['congestion_level'] = pd.cut(df['speed_ratio'],
                                    bins=[0, 0.5, 0.75, 1, float('inf')],
                                    labels=['Heavy', 'Moderate', 'Light', 'Free Flow'])

    # Prepare data for the model
    X, y_speed, y_congestion, le = prepare_data_for_model(df)

    # Scale the features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # Train models
    rf_speed, rf_congestion = train_models(X_scaled, y_speed, y_congestion)

    # Save the initial models
    save_models(rf_speed, rf_congestion, scaler, le)

    print("\nStarting real-time prediction and route optimization...")

    while True:
        try:
            # Fetch new data
            new_data = await fetch_new_data()

            if new_data:
                # Make predictions
                predictions_df = make_predictions((rf_speed, rf_congestion), new_data, scaler, le)

                # Print some results
                print("\nLatest predictions:")
                print(predictions_df[['current_speed', 'predicted_speed', 'congestion_level', 'predicted_congestion']].head())

                # Build graph for route optimization
                G = build_graph(predictions_df.to_dict('records'))

                # Optimize route between two random nodes
                nodes = list(G.nodes())
                if len(nodes) >= 2:
                    start = nodes[0]
                    end = nodes[-1]

                    print(f"\nAttempting to find route from {start} to {end}")
                    path, total_cost = optimize_route(G, start, end)

                    if path:
                        print(f"Optimized route found:")
                        print(f"Path: {path}")
                        print(f"Estimated travel time: {total_cost:.2f} hours")
                    else:
                        print("Failed to find a path. Please check the graph connectivity.")
                else:
                    print("\nNot enough nodes for route optimization")

                # Save the results
                current_time = datetime.now(NYC_TZ).strftime("%Y%m%d_%H%M%S")
                predictions_df.to_csv(f'predictions_{current_time}.csv', index=False)

                # Update the main DataFrame
                df = pd.concat([df, predictions_df], ignore_index=True)
                df = df.drop_duplicates(subset=['latitude', 'longitude', 'timestamp'], keep='last')

                # Optionally, update the model periodically
                if len(df) % 100 == 0:
                    print("\nUpdating model...")
                    X, y_speed, y_congestion, le = prepare_data_for_model(df)
                    X_scaled = scaler.fit_transform(X)
                    rf_speed, rf_congestion = train_models(X_scaled, y_speed, y_congestion)
                    save_models(rf_speed, rf_congestion, scaler, le)
            else:
                print("\nNo new data received.")

            # Wait for the next interval
            print(f"\nWaiting for {interval} seconds before next data fetch...")
            await asyncio.sleep(interval)

        except KeyboardInterrupt:
            print("\nStopping real-time prediction and route optimization...")
            break
        except Exception as e:
            print(f"An error occurred: {e}")
            print("Waiting before retry...")
            await asyncio.sleep(interval)

def save_models(rf_speed, rf_congestion, scaler, le):
    joblib.dump(rf_speed, 'rf_speed_model.joblib')
    joblib.dump(rf_congestion, 'rf_congestion_model.joblib')
    joblib.dump(scaler, 'scaler.joblib')
    joblib.dump(le, 'label_encoder.joblib')
    print("Models saved successfully.")

# Run the main function
if __name__ == "__main__":
    import nest_asyncio
    nest_asyncio.apply()
    asyncio.run(main())

In [34]:
!pip install flask pyngrok

Collecting pyngrok
  Downloading pyngrok-7.2.0-py3-none-any.whl.metadata (7.4 kB)
Downloading pyngrok-7.2.0-py3-none-any.whl (22 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.2.0


In [35]:
!pip install osmnx

Collecting osmnx
  Downloading osmnx-1.9.4-py3-none-any.whl.metadata (4.9 kB)
Downloading osmnx-1.9.4-py3-none-any.whl (107 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m107.5/107.5 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: osmnx
Successfully installed osmnx-1.9.4


In [36]:
import os
os.environ['NGROK_AUTH_TOKEN'] = '2lAYo1STfa8bopCQOAIsbEvseUz_5sxRJ28NUQCLhVZRR716k'

In [37]:
!pip install folium



In [38]:
!mkdir templates

In [39]:
!touch templates/index.html

In [40]:
!pip install flask pyngrok joblib pandas networkx numpy geopy folium requests



In [62]:
from flask import Flask, render_template, request, jsonify, send_from_directory
from pyngrok import ngrok
import joblib
import pandas as pd
import networkx as nx
import numpy as np
import os
import logging
import traceback
import requests
import random
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut, GeocoderUnavailable
from geopy.distance import geodesic
import folium
import osmnx as ox
from logging.handlers import RotatingFileHandler
from tenacity import retry, stop_after_attempt, wait_exponential

app = Flask(__name__)

def setup_logging():
    handler = RotatingFileHandler('app.log', maxBytes=10000, backupCount=3)
    handler.setLevel(logging.INFO)
    formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    handler.setFormatter(formatter)
    app.logger.addHandler(handler)
    app.logger.setLevel(logging.INFO)

setup_logging()

ngrok_auth_token = os.getenv("NGROK_AUTH_TOKEN")
if not ngrok_auth_token:
    raise ValueError("NGROK_AUTH_TOKEN environment variable is not set")
ngrok.set_auth_token(ngrok_auth_token)

try:
    rf_speed = joblib.load('rf_speed_model.joblib')
    rf_congestion = joblib.load('rf_congestion_model.joblib')
    scaler = joblib.load('scaler.joblib')
    le = joblib.load('label_encoder.joblib')
    app.logger.info("Models loaded successfully")
except FileNotFoundError as e:
    app.logger.error(f"Error loading model files: {e}")
    raise

geolocator = Nominatim(user_agent="traffic_optimizer")

@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
def geocode(place_name):
    try:
        location = geolocator.geocode(place_name, timeout=10)
        if location:
            return location.latitude, location.longitude
        else:
            raise ValueError(f"Could not find coordinates for {place_name}")
    except (GeocoderTimedOut, GeocoderUnavailable) as e:
        app.logger.error(f"Geocoding service error: {str(e)}")
        raise

def get_weather_data(lat, lon):
    api_key = "28edebb4a2f283c1805f913584ad2905"  # Replace with your actual API key
    url = f"http://api.openweathermap.org/data/2.5/weather?lat={lat}&lon={lon}&appid={api_key}&units=metric"
    try:
        response = requests.get(url)
        data = response.json()
        return {
            'humidity': data['main']['humidity'],
            'temperature': data['main']['temp']
        }
    except Exception as e:
        app.logger.error(f"Error fetching weather data: {str(e)}")
        return {'humidity': 50, 'temperature': 20}  # Default values

def prepare_data(lat, lon):
    weather = get_weather_data(lat, lon)
    data = {
        'hour': pd.Timestamp.now().hour,
        'day_of_week': pd.Timestamp.now().dayofweek,
        'is_weekend': 1 if pd.Timestamp.now().dayofweek >= 5 else 0,
        'is_rush_hour': 1 if pd.Timestamp.now().hour in [7, 8, 9, 16, 17, 18] else 0,
        'humidity': weather['humidity'],
        'temperature': weather['temperature'],
        'speed_ratio': 1.0,
    }
    return pd.DataFrame([data])

def make_prediction(df):
    X = df[scaler.feature_names_in_]
    app.logger.info(f"Features being used for prediction: {X.columns}")
    X_scaled = scaler.transform(X)
    speed_pred = rf_speed.predict(X_scaled)
    congestion_pred = rf_congestion.predict(X_scaled)
    return speed_pred[0], le.inverse_transform([congestion_pred[0]])[0]

def calculate_edge_cost(G, u, v, d):
    length = d.get('length', 1)  # Use 'length' if available, otherwise default to 1
    speed = d.get('speed', 30)  # Default speed if not set
    time = length / (speed * 1000 / 3600)  # Convert km/h to m/s
    return time

def optimize_route(G, start, end):
    try:
        start_node = ox.distance.nearest_nodes(G, start[1], start[0])
        end_node = ox.distance.nearest_nodes(G, end[1], end[0])

        path = nx.shortest_path(G, start_node, end_node, weight=lambda u, v, data: calculate_edge_cost(G, u, v, data[0]))

        total_time = sum(calculate_edge_cost(G, path[i], path[i+1], G.get_edge_data(path[i], path[i+1], 0)) for i in range(len(path)-1))
        total_distance = sum(G.edges[path[i], path[i+1], 0]['length'] for i in range(len(path)-1))

        return path, total_time, total_distance
    except nx.NetworkXNoPath:
        app.logger.error(f"No path found between {start} and {end}.")
        return None, None, None

def get_color_for_congestion(congestion):
    if congestion == 'Light':
        return 'green'
    elif congestion == 'Moderate':
        return 'orange'
    else:
        return 'red'

def create_map(G, path, congestion_levels):
    m = folium.Map(location=[G.nodes[path[0]]['y'], G.nodes[path[0]]['x']], zoom_start=12)

    for i in range(len(path) - 1):
        color = get_color_for_congestion(congestion_levels[i])
        folium.PolyLine(
            locations=[(G.nodes[path[i]]['y'], G.nodes[path[i]]['x']), (G.nodes[path[i+1]]['y'], G.nodes[path[i+1]]['x'])],
            color=color,
            weight=4,
            opacity=0.8
        ).add_to(m)

    folium.Marker([G.nodes[path[0]]['y'], G.nodes[path[0]]['x']], popup='Start').add_to(m)
    folium.Marker([G.nodes[path[-1]]['y'], G.nodes[path[-1]]['x']], popup='End').add_to(m)

    legend_html = '''
    <div style="position: fixed;
                bottom: 50px; left: 50px; width: 150px; height: 90px;
                border:2px solid grey; z-index:9999; font-size:12px;
                background-color:white; padding: 10px;">
    <b>Congestion Levels</b><br>
    <i style="color:green">●</i> Light<br>
    <i style="color:orange">●</i> Moderate<br>
    <i style="color:red">●</i> Heavy
    </div>
    '''
    m.get_root().html.add_child(folium.Element(legend_html))

    map_file = os.path.join(app.root_path, 'templates', 'temp_map.html')
    m.save(map_file)
    return os.path.basename(map_file)

@app.route('/')
def index():
    return render_template('index.html')

@app.route('/predict', methods=['POST'])
def predict():
    try:
        data = request.json
        app.logger.info(f"Received data: {data}")

        start_place = data['start']
        end_place = data['end']

        start_lat, start_lon = geocode(start_place)
        end_lat, end_lon = geocode(end_place)

        start_df = prepare_data(start_lat, start_lon)
        end_df = prepare_data(end_lat, end_lon)

        start_speed, start_congestion = make_prediction(start_df)
        end_speed, end_congestion = make_prediction(end_df)

        # Load the road network
        G = ox.graph_from_point((start_lat, start_lon), dist=10000, network_type='drive')

        for _, _, d in G.edges(data=True):
            d['speed'] = random.uniform(20, 80)  # Random speed between 20 and 80 km/h
            d['congestion'] = random.choice(['Light', 'Moderate', 'Heavy'])

        app.logger.info("Optimizing route")
        path, travel_time, distance = optimize_route(G, (start_lat, start_lon), (end_lat, end_lon))

        if path is None:
            return jsonify({'error': 'No path found'}), 400

        congestion_levels = [G.edges[path[i], path[i+1], 0]['congestion'] for i in range(len(path)-1)]

        map_file = create_map(G, path, congestion_levels)

        result = {
            'start': {
                'place': start_place,
                'coordinates': [start_lat, start_lon],
                'speed': float(start_speed),
                'congestion': start_congestion
            },
            'end': {
                'place': end_place,
                'coordinates': [end_lat, end_lon],
                'speed': float(end_speed),
                'congestion': end_congestion
            },
            'path': path,
            'travel_time': travel_time / 3600,  # Convert seconds to hours
            'distance': distance / 1000,  # Convert meters to kilometers
            'map_file': map_file
        }
        app.logger.info(f"Returning result: {result}")
        return jsonify(result)
    except Exception as e:
        app.logger.error(f"An error occurred: {str(e)}", exc_info=True)
        return jsonify({
            'error': str(e),
            'traceback': traceback.format_exc()
        }), 400

@app.route('/map/<path:filename>')
def serve_map(filename):
    try:
        return send_from_directory(os.path.join(app.root_path, 'templates'), filename)
    except FileNotFoundError:
        app.logger.error(f"Map file not found: {filename}")
        return "Map file not found", 404



INFO:__main__:Models loaded successfully


In [42]:
html_content = '''<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Traffic Route Optimizer</title>
    <link href="https://fonts.googleapis.com/css2?family=Poppins:wght@300;400;600&display=swap" rel="stylesheet">
    <style>
        body {
            font-family: 'Poppins', sans-serif;
            background: linear-gradient(135deg, #000000 0%, #434343 100%);
            color: #ffffff;
            margin: 0;
            padding: 0;
            display: flex;
            justify-content: center;
            align-items: center;
            min-height: 100vh;
        }
        .container {
            background-color: rgba(255, 255, 255, 0.1);
            backdrop-filter: blur(10px);
            border-radius: 20px;
            padding: 3rem;
            box-shadow: 0 8px 32px rgba(0, 0, 0, 0.37);
            max-width: 600px;
            width: 100%;
        }
        h1 {
            text-align: center;
            margin-bottom: 2rem;
            color: #ff0000;
            font-size: 2.5rem;
            text-shadow: 2px 2px 4px rgba(0,0,0,0.1);
        }
        form {
            display: flex;
            flex-direction: column;
        }
        label {
            margin-bottom: 0.5rem;
            font-weight: 600;
            color: #f0f0f0;
        }
        input {
            padding: 1rem;
            margin-bottom: 1.5rem;
            border: none;
            border-radius: 10px;
            background-color: rgba(255, 255, 255, 0.2);
            color: #ffffff;
            font-size: 1rem;
            transition: all 0.3s ease;
        }
        input:focus {
            outline: none;
            box-shadow: 0 0 0 2px #ff0000;
        }
        input::placeholder {
            color: rgba(255, 255, 255, 0.7);
        }
        button {
            padding: 1rem;
            background-color: #ff0000;
            color: white;
            border: none;
            border-radius: 10px;
            cursor: pointer;
            font-size: 1.1rem;
            font-weight: 600;
            transition: all 0.3s ease;
            text-transform: uppercase;
            letter-spacing: 1px;
        }
        button:hover {
            background-color: #cc0000;
            transform: translateY(-2px);
            box-shadow: 0 4px 8px rgba(0,0,0,0.2);
        }
        #result {
            margin-top: 2rem;
            background-color: rgba(255, 255, 255, 0.1);
            padding: 1.5rem;
            border-radius: 10px;
            font-size: 0.9rem;
            line-height: 1.6;
        }
        #result h2 {
            color: #ff0000;
            margin-bottom: 1rem;
        }
        #result p {
            margin-bottom: 0.5rem;
        }
        iframe {
            border-radius: 10px;
            margin-top: 1rem;
        }
    </style>
</head>
<body>
    <div class="container">
        <h1>Traffic Route Optimizer 🚗</h1>
        <form id="route-form">
            <label for="start">From:</label>
            <input type="text" id="start" name="start" required placeholder="Enter starting location">

            <label for="end">Destination:</label>
            <input type="text" id="end" name="end" required placeholder="Enter destination">

            <button type="submit">Optimize Route</button>
        </form>
        <div id="result"></div>
    </div>
    <script>
    document.getElementById('route-form').addEventListener('submit', function(e) {
    e.preventDefault();
    const start = document.getElementById('start').value;
    const end = document.getElementById('end').value;

    fetch('/predict', {
        method: 'POST',
        headers: {
            'Content-Type': 'application/json',
        },
        body: JSON.stringify({ start, end }),
    })
    .then(response => {
        if (!response.ok) {
            throw new Error(`HTTP error! status: ${response.status}`);
        }
        return response.json();
    })
    .then(data => {
        const resultDiv = document.getElementById('result');
        if (data.error) {
            resultDiv.innerHTML = `<p>Error: ${data.error}</p>`;
        } else {
            resultDiv.innerHTML = `
                <h2>Results 🎉</h2>
                <p><strong>From:</strong> ${data.start.place}</p>
                <p><span style="color: #ff0000;">Speed:</span> ${data.start.speed.toFixed(2)} km/h</p>
                <p><span style="color: #ff0000;">Congestion:</span> ${data.start.congestion} 🚦</p>
                <p><strong>Destination:</strong> ${data.end.place}</p>
                <p><span style="color: #ff0000;">Speed:</span> ${data.end.speed.toFixed(2)} km/h</p>
                <p><span style="color: #ff0000;">Congestion:</span> ${data.end.congestion} 🚦</p>
                <p><strong>Estimated travel time:</strong> ${(data.travel_time / 3600).toFixed(2)} hours ⏱️</p>
                <p><strong>Distance:</strong> ${data.distance.toFixed(2)} km 🛣️</p>
                ${data.map_file ? `<iframe src="/map/${data.map_file}" width="100%" height="300" frameborder="0"></iframe>` : ''}
            `;
        }
    })
    .catch((error) => {
        console.error('Error:', error);
        document.getElementById('result').innerHTML = `<p>Error: ${error.message}</p>`;
    });
});
</script>
</body>
</html>


'''

In [None]:
'''<p><strong>Estimated travel time:</strong> ${(data.travel_time).toFixed(2)} hours ⏱️</p>'''

In [60]:
#testing
html_content = '''<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Traffic Route Optimizer</title>
    <link href="https://fonts.googleapis.com/css2?family=Poppins:wght@300;400;600&display=swap" rel="stylesheet">
    <style>
        body {
            font-family: 'Poppins', sans-serif;
            background: linear-gradient(135deg, #000000 0%, #434343 100%);
            color: #ffffff;
            margin: 0;
            padding: 0;
            display: flex;
            justify-content: center;
            align-items: center;
            min-height: 100vh;
        }
        .container {
            background-color: rgba(255, 255, 255, 0.1);
            backdrop-filter: blur(10px);
            border-radius: 20px;
            padding: 3rem;
            box-shadow: 0 8px 32px rgba(0, 0, 0, 0.37);
            max-width: 600px;
            width: 100%;
        }
        h1 {
            text-align: center;
            margin-bottom: 2rem;
            color: #ff0000;
            font-size: 2.5rem;
            text-shadow: 2px 2px 4px rgba(0,0,0,0.1);
        }
        form {
            display: flex;
            flex-direction: column;
        }
        label {
            margin-bottom: 0.5rem;
            font-weight: 600;
            color: #f0f0f0;
        }
        input {
            padding: 1rem;
            margin-bottom: 1.5rem;
            border: none;
            border-radius: 10px;
            background-color: rgba(255, 255, 255, 0.2);
            color: #ffffff;
            font-size: 1rem;
            transition: all 0.3s ease;
        }
        input:focus {
            outline: none;
            box-shadow: 0 0 0 2px #ff0000;
        }
        input::placeholder {
            color: rgba(255, 255, 255, 0.7);
        }
        button {
            padding: 1rem;
            background-color: #ff0000;
            color: white;
            border: none;
            border-radius: 10px;
            cursor: pointer;
            font-size: 1.1rem;
            font-weight: 600;
            transition: all 0.3s ease;
            text-transform: uppercase;
            letter-spacing: 1px;
        }
        button:hover {
            background-color: #cc0000;
            transform: translateY(-2px);
            box-shadow: 0 4px 8px rgba(0,0,0,0.2);
        }
        #result {
            margin-top: 2rem;
            background-color: rgba(255, 255, 255, 0.1);
            padding: 1.5rem;
            border-radius: 10px;
            font-size: 0.9rem;
            line-height: 1.6;
        }
        #result h2 {
            color: #ff0000;
            margin-bottom: 1rem;
        }
        #result p {
            margin-bottom: 0.5rem;
        }
        iframe {
            border-radius: 10px;
            margin-top: 1rem;
        }
    </style>
</head>
<body>
    <div class="container">
        <h1>Traffic Route Optimizer 🚗</h1>
        <form id="route-form">
            <label for="start">From:</label>
            <input type="text" id="start" name="start" required placeholder="Enter starting location">

            <label for="end">Destination:</label>
            <input type="text" id="end" name="end" required placeholder="Enter destination">

            <button type="submit">Optimize Route</button>
        </form>
        <div id="result"></div>
    </div>
    <script>
    document.getElementById('route-form').addEventListener('submit', function(e) {
    e.preventDefault();
    const start = document.getElementById('start').value;
    const end = document.getElementById('end').value;

    fetch('/predict', {
        method: 'POST',
        headers: {
            'Content-Type': 'application/json',
        },
        body: JSON.stringify({ start, end }),
    })
    .then(response => {
        if (!response.ok) {
            throw new Error(`HTTP error! status: ${response.status}`);
        }
        return response.json();
    })
    .then(data => {
        const resultDiv = document.getElementById('result');
        if (data.error) {
            resultDiv.innerHTML = `<p>Error: ${data.error}</p>`;
        } else {
            resultDiv.innerHTML = `
                <h2>Results 🎉</h2>
                <p><strong>From:</strong> ${data.start.place}</p>
                <p><span style="color: #ff0000;">Speed:</span> ${data.start.speed.toFixed(2)} km/h</p>
                <p><span style="color: #ff0000;">Congestion:</span> ${data.start.congestion} 🚦</p>
                <p><strong>Destination:</strong> ${data.end.place}</p>
                <p><span style="color: #ff0000;">Speed:</span> ${data.end.speed.toFixed(2)} km/h</p>
                <p><span style="color: #ff0000;">Congestion:</span> ${data.end.congestion} 🚦</p>
                <p><strong>Estimated travel time:</strong> ${
  data.travel_time >= 1
    ? `${Math.floor(data.travel_time)} hr ${Math.round((data.travel_time % 1) * 60)} min`
    : `${Math.round(data.travel_time * 60)} min`
} ⏱️</p>
                <p><strong>Distance:</strong> ${data.distance.toFixed(2)} km 🛣️</p>
                ${data.map_file ? `<iframe src="/map/${data.map_file}" width="100%" height="300" frameborder="0"></iframe>` : ''}
            `;
        }
    })
    .catch((error) => {
        console.error('Error:', error);
        document.getElementById('result').innerHTML = `<p>Error: ${error.message}</p>`;
    });
});
</script>
</body>
</html>


'''

In [61]:
with open('templates/index.html', 'w') as f:
    f.write(html_content)

In [58]:
!cat templates/index.html

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Traffic Route Optimizer</title>
    <link href="https://fonts.googleapis.com/css2?family=Poppins:wght@300;400;600&display=swap" rel="stylesheet">
    <style>
        body {
            font-family: 'Poppins', sans-serif;
            background: linear-gradient(135deg, #000000 0%, #434343 100%);
            color: #ffffff;
            margin: 0;
            padding: 0;
            display: flex;
            justify-content: center;
            align-items: center;
            min-height: 100vh;
        }
        .container {
            background-color: rgba(255, 255, 255, 0.1);
            backdrop-filter: blur(10px);
            border-radius: 20px;
            padding: 3rem;
            box-shadow: 0 8px 32px rgba(0, 0, 0, 0.37);
            max-width: 600px;
            width: 100%;
        }
        h1 {
            text-align: ce

This code works as well

THE BEST CODE TILL NOW

In [63]:
if __name__ == '__main__':
    try:
        # Create 'templates' directory if it doesn't exist
        os.makedirs('templates', exist_ok=True)

        # Create index.html file
        with open('templates/index.html', 'w') as f:
            f.write(html_content)

        ngrok_tunnel = ngrok.connect(8000)
        print('Public URL:', ngrok_tunnel.public_url)
        app.run(port=8000)
    except Exception as e:
        print(f"An error occurred: {e}")

Public URL: https://f85e-35-229-181-63.ngrok-free.app
 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:8000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug:127.0.0.1 - - [03/Sep/2024 19:56:15] "GET / HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [03/Sep/2024 19:56:16] "[33mGET /favicon.ico HTTP/1.1[0m" 404 -
INFO:__main__:Received data: {'start': 'Dayton, Ohio', 'end': 'Downtown, Cincinnati'}
INFO:__main__:Features being used for prediction: Index(['hour', 'day_of_week', 'is_weekend', 'is_rush_hour', 'temperature',
       'humidity', 'speed_ratio'],
      dtype='object')
INFO:__main__:Features being used for prediction: Index(['hour', 'day_of_week', 'is_weekend', 'is_rush_hour', 'temperature',
       'humidity', 'speed_ratio'],
      dtype='object')
  G = graph_from_bbox(
INFO:__main__:Optimizing route
INFO:__main__:Returning result: {'start': {'place': 'Dayton, Ohio', 'coordinates': [39.7589478, -84.1916069], 'speed': 19.78, 'congestion': 'Light'}, 'end': {'place': 'Downtown, Cincinnati', 'coordinates': [39.101681150000005, -84.50808279152703], 

I'm gonna test from here
