# Semantic Multi-Agent Traffic Coordination System
brief description


[Semantic Multi-Agent Traffic Coordination System](#scrollTo=a-sc2cILMIyq)

[Step 0: Import Libraries](#scrollTo=qA1hmKJANLKF)

[Step 1: Load Dataset](#scrollTo=iObWKB8SQ-YJ)

[Step 2: Preprocessing](#scrollTo=MD78REV9RFVr)

>[2.1 Basic Text Cleaning](#scrollTo=D6WrtzhI3RkP)

>[2.2 Named Entity Recognition (NER)](#scrollTo=woied8x6-8aX)

>[2.3 Tokenization](#scrollTo=JVLRTNg33w8A)

>[2.4 Stopword Removal](#scrollTo=jiH-VZ8K3y8q)

>[2.5 Lemmatization](#scrollTo=AhDkmU2r34aA)

>[2.6 Bigram & Trigram Detection](#scrollTo=xBPwHDyZANRn)

>[2.7 Rebuilding Processed Text](#scrollTo=jDUA668u36XH)

>[2.8 Dataset Statistics](#scrollTo=Vb67E4aF3-82)

[Step 3: NLP Models](#scrollTo=7RzSLozMR8bZ)

[Step 4: Evaluation Metrics](#scrollTo=53rOZDEjSEzX)



## Step 0: Import Libraries

In [None]:
import pandas as pd
import sys
import numpy as np
import scipy as sp
import matplotlib.pyplot as plt
from math import radians, cos, sin, asin, sqrt
import datetime
import json
import re
from typing import Dict, List, Tuple
from collections import Counter
import warnings
warnings.filterwarnings('ignore')
from sklearn.linear_model import LinearRegression
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score

# NLP Libraries
import nltk
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer, PorterStemmer
from nltk.chunk import ne_chunk
from nltk.tag import pos_tag

### *Required downloads*

In [None]:
print("Downloading NLTK packages...")
nltk_downloads = ['punkt', 'stopwords', 'wordnet', 'averaged_perceptron_tagger',
                  'maxent_ne_chunker', 'words', 'omw-1.4']
for package in nltk_downloads:
    try:
        nltk.download(package, quiet=True)
    except:
        pass


Downloading NLTK packages...


In [None]:
nltk.download('punkt_tab', quiet=True)

True

In [None]:
!pip install spacy
!python -m spacy download en_core_web_sm


Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m53.4 MB/s[0m eta [36m0:00:00[0m
[?25h[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [None]:
!pip install gensim




## Step 1: Load Dataset


In [None]:
road_rules = pd.read_json("road_rules.json")

road_rules.head()

Unnamed: 0,id,road_name,segment_id,area,speed_limit_kmh,min_speed_kmh,lanes,road_type,allowed_vehicles,has_emergency_lane,typical_congestion_level,peak_period,notes
0,1,King Road,S001,Intersection 1,80,40,3,arterial,"[car, truck, bus, motorcycle, emergency]",True,high,morning_peak,"Segment of King Road near Intersection 1, ofte..."
1,2,Tahlia Street,S002,Intersection 2,60,30,2,collector,"[car, bus, motorcycle, emergency]",False,medium,evening_peak,Commercial corridor with shops and restaurants...
2,3,Prince Sultan Road,S003,Intersection 3,70,40,3,arterial,"[car, truck, bus, motorcycle, emergency]",True,medium,morning_peak,Major north–south corridor connecting resident...
3,4,Haram Road,S004,Intersection 4,80,50,4,highway,"[car, truck, bus, emergency]",True,high,all_day,High-demand corridor serving pilgrims and loca...
4,5,Airport Road,S005,Intersection 5,100,60,4,highway,"[car, truck, bus, emergency]",True,medium,morning_peak,"Main access route to the airport, frequent eme..."


In [None]:
emergency_protocols = pd.read_json("emergency_protocols.json")

emergency_protocols.head()

Unnamed: 0,id,protocol_code,emergency_type,default_severity,description,steps,notify_services,max_response_time_minutes,requires_manual_confirmation
0,1,MAJOR_ACCIDENT,major_accident,critical,Collision involving multiple vehicles with pos...,[Verify exact location using GPS and nearest i...,"[police, ambulance, najm]",10,False
1,2,DELAYED_EMERGENCY_VEHICLE,delayed_emergency_vehicle,high,"An ambulance, fire truck, or police vehicle is...",[Identify blocked segments on the planned emer...,"[ambulance, police, traffic_control_center]",5,False
2,3,FIRE_OR_SMOKE_NEAR_ROAD,fire_or_smoke,high,Presence of smoke or fire near the roadway det...,[Confirm smoke or fire levels using camera fee...,"[fire_department, ambulance, police]",8,True
3,4,FLOODING_ROAD,flooding_road,medium,"Road segment partially or fully flooded, posin...",[Mark flooded road segments as closed in the k...,"[municipality, police]",20,True
4,5,MINOR_ACCIDENT,minor_accident,medium,Two-vehicle collision with no serious injuries...,[Verify that there are no serious injuries usi...,"[police, najm]",15,False


In [None]:
incident_cases = pd.read_json("incident_cases.json")

incident_cases.head()

Unnamed: 0,id,title,road_name,area,segment_id,time_of_day,weather,cause,severity_label,vehicles_involved,involved_vehicle_types,is_emergency_vehicle_delayed,description,sensors_triggered,response_taken,resolution_time_minutes,secondary_incident
0,1,High severity accident on King Road,King Road,Intersection 1,S001,morning_peak,rainy,rear_end_collision,high,3,"[car, truck]",False,High severity rear-end collision on King Road ...,"[camera, speed_radar, loop_detector]",dispatch_ambulance,60,False
1,2,Medium congestion on Tahlia Street,Tahlia Street,Intersection 2,S002,evening_peak,clear,event_crowd,medium,0,"[car, bus]",False,Medium congestion on Tahlia Street near Inters...,"[camera, loop_detector]",reroute_traffic,45,False
2,3,Critical ambulance delay on Haram Road,Haram Road,Intersection 4,S004,morning_peak,foggy,congestion,critical,1,"[ambulance, car]",True,Critical delay of an ambulance on Haram Road n...,"[camera, acoustic, speed_radar]",manual_signal_override,25,False
3,4,Low severity breakdown on Airport Road,Airport Road,Intersection 5,S005,midday,clear,breakdown,low,1,[car],False,Single vehicle breakdown on the shoulder of Ai...,[camera],dispatch_tow_truck,30,False
4,5,High severity multi-vehicle crash on Industria...,Industrial Road,Intersection 12,S012,morning_peak,dusty,multi_vehicle_collision,high,4,"[truck, car, bus]",True,High severity multi-vehicle crash on Industria...,"[camera, smoke, loop_detector]",dispatch_ambulance,90,True


In [None]:
traffic_policies = pd.read_json("traffic_policies.json")

traffic_policies.head()

Unnamed: 0,id,policy_code,category,applicable_severity,description,priority_rank,related_incident_types,time_window,applies_to_roads
0,1,EMERGENCY_PRIORITY_HIGH,emergency_priority,high,When an emergency vehicle is detected within 2...,1,"[emergency_delay, accident]",all_day,"[Haram Road, Hospital Street, Airport Road]"
1,2,EMERGENCY_PRIORITY_MEDIUM,emergency_priority,medium,When an emergency vehicle is approaching with ...,2,[emergency_delay],morning_peak,"[Ring Road East, Ring Road West]"
2,3,EMERGENCY_PRIORITY_LOW,emergency_priority,low,"Under low congestion, give normal green but av...",3,[emergency_delay],night,"[King Road, Corniche Road]"
3,4,CONGESTION_MITIGATION_HIGH,congestion_mitigation,high,If average speed on a segment drops below 10 k...,1,[congestion],evening_peak,"[King Road, Market Road, Industrial Road]"
4,5,CONGESTION_MITIGATION_MEDIUM,congestion_mitigation,medium,If average speed is between 10 and 25 km/h wit...,2,"[congestion, event_crowd]",all_day,"[Tahlia Street, Corniche Road]"


In [None]:
with open('graph_data.json', 'r') as f:
    graph_data = json.load(f)

# Step 2: Preprocessing

## 2.1 Basic Text Cleaning

First, we clean the text by lowering the case and removing unnecessary symbols.


In [None]:
def clean_text(text):
    if not isinstance(text, str):
        return ""
    text = text.lower()
    text = re.sub(r'[^a-z0-9\s]', '', text)
    text = re.sub(r'\s+', ' ', text).strip()
    return text

# Clean incident descriptions
incident_cases['description_cleaned'] = incident_cases['description'].apply(clean_text)

# Clean protocol descriptions
emergency_protocols['description_cleaned'] = emergency_protocols['description'].apply(clean_text)

# Clean policy descriptions
traffic_policies['description_cleaned'] = traffic_policies['description'].apply(clean_text)

# Clean road notes
road_rules['notes_cleaned'] = road_rules['notes'].apply(clean_text)

print("Text cleaning completed")



Text cleaning completed


## 2.2 Named Entity Recognition (NER)

Here, we extract important entities such as locations, vehicles, and events from the cleaned text to improve search relevance and structured retrieval.

In [None]:
import spacy
nlp = spacy.load("en_core_web_sm")

def extract_entities(text):
    doc = nlp(text)
    return [(ent.text, ent.label_) for ent in doc.ents]

incident_cases['entities'] = incident_cases['description_cleaned'].apply(extract_entities)


## 2.3 Tokenization

Next, we break the cleaned text into individual words (tokens).

In [None]:
def tokenize_text(text):
    if not text:
        return []
    return word_tokenize(text)

# Tokenize incidents
incident_cases['tokens'] = incident_cases['description_cleaned'].apply(tokenize_text)

# Tokenize protocols
emergency_protocols['tokens'] = emergency_protocols['description_cleaned'].apply(tokenize_text)

# Tokenize policies
traffic_policies['tokens'] = traffic_policies['description_cleaned'].apply(tokenize_text)

# Tokenize road rules
road_rules['tokens'] = road_rules['notes_cleaned'].apply(tokenize_text)

print("Tokenization completed")

Tokenization completed


## 2.4 Stopword Removal



Then, we remove common English words that don't carry important meaning.

In [None]:
# Get English stopwords
stop_words = set(stopwords.words('english'))

# Keep important traffic words
important_words = {'road', 'street', 'vehicle', 'accident', 'emergency',
                   'ambulance', 'police', 'fire', 'traffic', 'lane',
                   'intersection', 'delay', 'blocked', 'congestion'}
stop_words = stop_words - important_words

def remove_stopwords(tokens):
    return [word for word in tokens if word not in stop_words]

# Remove stopwords from incidents
incident_cases['tokens_clean'] = incident_cases['tokens'].apply(remove_stopwords)

# Remove stopwords from protocols
emergency_protocols['tokens_clean'] = emergency_protocols['tokens'].apply(remove_stopwords)

# Remove stopwords from policies
traffic_policies['tokens_clean'] = traffic_policies['tokens'].apply(remove_stopwords)

# Remove stopwords from road rules
road_rules['tokens_clean'] = road_rules['tokens'].apply(remove_stopwords)

print("Stopword removal completed")


Stopword removal completed


## 2.5 Lemmatization

After that, we convert each word into its base or root form (e.g., running --> run).


In [None]:
lemmatizer = WordNetLemmatizer()

def lemmatize_tokens(tokens):
    return [lemmatizer.lemmatize(word) for word in tokens]

# Lemmatize incidents
incident_cases['tokens_lemmatized'] = incident_cases['tokens_clean'].apply(lemmatize_tokens)

# Lemmatize protocols
emergency_protocols['tokens_lemmatized'] = emergency_protocols['tokens_clean'].apply(lemmatize_tokens)

# Lemmatize policies
traffic_policies['tokens_lemmatized'] = traffic_policies['tokens_clean'].apply(lemmatize_tokens)

# Lemmatize road rules
road_rules['tokens_lemmatized'] = road_rules['tokens_clean'].apply(lemmatize_tokens)

print("Lemmatization completed")

Lemmatization completed


## 2.6 Bigram & Trigram Detection


Next, we detect common multi-word phrases (like “rear end collision”) from the lemmatized tokens to preserve meaningful combinations of words for better retrieval.

In [None]:
from gensim.models import Phrases
from gensim.models.phrases import Phraser

# Build bigrams and trigrams
sentences = incident_cases['tokens_clean'].tolist()

bigram = Phrases(sentences, min_count=2, threshold=5)
trigram = Phrases(bigram[sentences], threshold=5)

bigram_mod = Phraser(bigram)
trigram_mod = Phraser(trigram)

def apply_ngrams(tokens):
    tokens = bigram_mod[tokens]
    tokens = trigram_mod[tokens]
    return list(tokens)

incident_cases['tokens_ngram'] = incident_cases['tokens_clean'].apply(apply_ngrams)


In [None]:
# Example: first 5 original token lists
print("Original tokens:")
print(incident_cases['tokens_clean'].head())

# Example: first 5 token lists after applying ngrams
print("\nTokens after bigrams/trigrams:")
print(incident_cases['tokens_ngram'].head())


Original tokens:
0    [high, severity, rearend, collision, king, roa...
1    [medium, congestion, tahlia, street, near, int...
2    [critical, delay, ambulance, haram, road, near...
3    [single, vehicle, breakdown, shoulder, airport...
4    [high, severity, multivehicle, crash, industri...
Name: tokens_clean, dtype: object

Tokens after bigrams/trigrams:
0    [high_severity, rearend, collision, king, road...
1    [medium, congestion, tahlia, street, near_inte...
2    [critical, delay, ambulance, haram, road_near,...
3    [single, vehicle, breakdown, shoulder, airport...
4    [high_severity, multivehicle, crash, industria...
Name: tokens_ngram, dtype: object


## 2.7 Rebuilding Processed Text
Finally, we join the cleaned and lemmatized tokens back into a processed text string.




In [None]:
# Join tokens back into text
incident_cases['text_processed'] = incident_cases['tokens_lemmatized'].apply(lambda x: ' '.join(x))
emergency_protocols['text_processed'] = emergency_protocols['tokens_lemmatized'].apply(lambda x: ' '.join(x))
traffic_policies['text_processed'] = traffic_policies['tokens_lemmatized'].apply(lambda x: ' '.join(x))
road_rules['text_processed'] = road_rules['tokens_lemmatized'].apply(lambda x: ' '.join(x))

print("Processed text created")


Processed text created


## 2.8 Dataset Statistics



Here, we print an overview of the dataset sizes and key distributions.

In [None]:

print(f"Total incidents: {len(incident_cases)}")
print(f"Total protocols: {len(emergency_protocols)}")
print(f"Total policies: {len(traffic_policies)}")

print("\nSeverity distribution:")
print(incident_cases['severity_label'].value_counts())

print("\nCause distribution:")
print(incident_cases['cause'].value_counts())

Total incidents: 12
Total protocols: 5
Total policies: 12

Severity distribution:
severity_label
medium      4
high        3
low         3
critical    2
Name: count, dtype: int64

Cause distribution:
cause
rear_end_collision         2
event_crowd                2
breakdown                  2
congestion                 1
multi_vehicle_collision    1
roadwork                   1
flooding                   1
pedestrian_crossing        1
fire                       1
Name: count, dtype: int64


### *Save preprocessed versions as JSON files*

In [None]:
 incident_cases.to_json(
    "incident_cases_preprocessed.json",
    orient="records",
    indent=2
)

emergency_protocols.to_json(
    "emergency_protocols_preprocessed.json",
    orient="records",
    indent=2
)

traffic_policies.to_json(
    "traffic_policies_preprocessed.json",
    orient="records",
    indent=2
)

road_rules.to_json(
    "road_rules_preprocessed.json",
    orient="records",
    indent=2
)

## Step 3: NLP Models

In [None]:
# Use the processed text as features
X = incident_cases['text_processed']

# Choose target label — you can switch to 'cause' if needed
y = incident_cases['severity_label']

# Train–test split
from sklearn.model_selection import train_test_split

# Adjust test_size to be at least the number of classes for stratification to work
# With 12 samples and 4 classes, test_size=0.4 results in 4 test samples (12 * 0.4 = 4.8, rounded to 4).
# This ensures at least one sample per class for stratification.
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.4, random_state=42, stratify=y
)

X_train[:5], y_train[:5]

(3    single vehicle breakdown shoulder airport road...
 8    medium severity nearmiss involving pedestrian ...
 2    critical delay ambulance haram road near inter...
 6    low severity slowdown corniche road near inter...
 5    lane closure due scheduled roadworks market ro...
 Name: text_processed, dtype: object,
 3         low
 8      medium
 2    critical
 6         low
 5      medium
 Name: severity_label, dtype: object)

In [None]:
from sklearn.feature_extraction.text import CountVectorizer

bow = CountVectorizer()
X_train_bow = bow.fit_transform(X_train)
X_test_bow = bow.transform(X_test)

print("BoW training shape:", X_train_bow.shape)
print("BoW testing shape:", X_test_bow.shape)


BoW training shape: (7, 72)
BoW testing shape: (5, 72)


In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer

tfidf = TfidfVectorizer()
X_train_tfidf = tfidf.fit_transform(X_train)
X_test_tfidf = tfidf.transform(X_test)

print("TF-IDF training shape:", X_train_tfidf.shape)
print("TF-IDF testing shape:", X_test_tfidf.shape)


TF-IDF training shape: (7, 72)
TF-IDF testing shape: (5, 72)


In [None]:
tfidf_ngram = TfidfVectorizer(ngram_range=(1, 2))
X_train_ngram = tfidf_ngram.fit_transform(X_train)
X_test_ngram = tfidf_ngram.transform(X_test)

print("TF-IDF n-gram training shape:", X_train_ngram.shape)


TF-IDF n-gram training shape: (7, 158)


In [None]:
from sklearn.linear_model import LogisticRegression

lr_bow = LogisticRegression(max_iter=1000)
lr_bow.fit(X_train_bow, y_train)

lr_tfidf = LogisticRegression(max_iter=1000)
lr_tfidf.fit(X_train_tfidf, y_train)

lr_ngram = LogisticRegression(max_iter=1000)
lr_ngram.fit(X_train_ngram, y_train)


In [None]:
from sklearn.svm import LinearSVC

svm_bow = LinearSVC()
svm_bow.fit(X_train_bow, y_train)

svm_tfidf = LinearSVC()
svm_tfidf.fit(X_train_tfidf, y_train)

svm_ngram = LinearSVC()
svm_ngram.fit(X_train_ngram, y_train)


In [None]:
from sklearn.naive_bayes import MultinomialNB

nb_bow = MultinomialNB()
nb_bow.fit(X_train_bow, y_train)

nb_tfidf = MultinomialNB()
nb_tfidf.fit(X_train_tfidf, y_train)

nb_ngram = MultinomialNB()
nb_ngram.fit(X_train_ngram, y_train)


## Step 4: Evaluation Metrics

In [None]:
from sklearn.metrics import accuracy_score, classification_report

def evaluate(model, X_test, y_test, name="Model"):
    print(f"\n===== {name} =====")
    preds = model.predict(X_test)
    print("Accuracy:", accuracy_score(y_test, preds))
    print(classification_report(y_test, preds))


In [None]:
evaluate(lr_bow, X_test_bow, y_test, "Logistic Regression (BoW)")
evaluate(lr_tfidf, X_test_tfidf, y_test, "Logistic Regression (TF-IDF)")
evaluate(lr_ngram, X_test_ngram, y_test, "Logistic Regression (TF-IDF + n-gram)")

evaluate(svm_bow, X_test_bow, y_test, "SVM (BoW)")
evaluate(svm_tfidf, X_test_tfidf, y_test, "SVM (TF-IDF)")
evaluate(svm_ngram, X_test_ngram, y_test, "SVM (TF-IDF + n-gram)")

evaluate(nb_bow, X_test_bow, y_test, "Naive Bayes (BoW)")
evaluate(nb_tfidf, X_test_tfidf, y_test, "Naive Bayes (TF-IDF)")
evaluate(nb_ngram, X_test_ngram, y_test, "Naive Bayes (TF-IDF + n-gram)")



===== Logistic Regression (BoW) =====
Accuracy: 0.6
              precision    recall  f1-score   support

    critical       0.00      0.00      0.00         1
        high       1.00      1.00      1.00         1
         low       0.33      1.00      0.50         1
      medium       1.00      0.50      0.67         2

    accuracy                           0.60         5
   macro avg       0.58      0.62      0.54         5
weighted avg       0.67      0.60      0.57         5


===== Logistic Regression (TF-IDF) =====
Accuracy: 0.8
              precision    recall  f1-score   support

    critical       0.00      0.00      0.00         1
        high       1.00      1.00      1.00         1
         low       0.50      1.00      0.67         1
      medium       1.00      1.00      1.00         2

    accuracy                           0.80         5
   macro avg       0.62      0.75      0.67         5
weighted avg       0.70      0.80      0.73         5


===== Logistic Regre

In [None]:
results = {
    "Model": [
        "LR (BoW)", "LR (TF-IDF)", "LR (TF-IDF+Ngram)",
        "SVM (BoW)", "SVM (TF-IDF)", "SVM (TF-IDF+Ngram)",
        "NB (BoW)", "NB (TF-IDF)", "NB (TF-IDF+Ngram)"
    ],
    "Accuracy": [
        lr_bow.score(X_test_bow, y_test),
        lr_tfidf.score(X_test_tfidf, y_test),
        lr_ngram.score(X_test_ngram, y_test),
        svm_bow.score(X_test_bow, y_test),
        svm_tfidf.score(X_test_tfidf, y_test),
        svm_ngram.score(X_test_ngram, y_test),
        nb_bow.score(X_test_bow, y_test),
        nb_tfidf.score(X_test_tfidf, y_test),
        nb_ngram.score(X_test_ngram, y_test)
    ]
}

comparison_df = pd.DataFrame(results)
comparison_df


Unnamed: 0,Model,Accuracy
0,LR (BoW),0.6
1,LR (TF-IDF),0.8
2,LR (TF-IDF+Ngram),0.4
3,SVM (BoW),0.8
4,SVM (TF-IDF),0.8
5,SVM (TF-IDF+Ngram),0.4
6,NB (BoW),0.6
7,NB (TF-IDF),0.6
8,NB (TF-IDF+Ngram),0.6


---



