#Scene Detection

In [None]:
import cv2
import os
import numpy as np

video_path = r"D:\Road Management\trim_videos\Bhingar.mp4"
output_dir = r"C:\Users\krish\Documents\a_aRMP\newframes\ar"

os.makedirs(output_dir, exist_ok=True)

hist_thresh = 0.6  
frame_interval = 5 

cap = cv2.VideoCapture(video_path)
prev_hist = None
frame_count = 0
saved_count = 0

while True:
    ret, frame = cap.read()
    if not ret:
        break

    if frame_count % frame_interval == 0:
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        hist = cv2.calcHist([gray], [0], None, [256], [0, 256])
        hist = cv2.normalize(hist, hist).flatten()

        if prev_hist is None:
            prev_hist = hist
            cv2.imwrite(f"{output_dir}/frame_{saved_count:03d}.jpg", frame)
            saved_count += 1
        else:
            diff = cv2.compareHist(prev_hist, hist, cv2.HISTCMP_CORREL)
            if diff < hist_thresh:
                cv2.imwrite(f"{output_dir}/frame_{saved_count:03d}.jpg", frame)
                saved_count += 1
                prev_hist = hist

    frame_count += 1

cap.release()
print(f"Done! Saved {saved_count} key frames.")


#COCO to YOLO

In [None]:
import os
import json
from pathlib import Path

coco_path = r"D:\Road Management\data_for_label\labels.json"  
images_dir = r"D:\Road Management\data_for_label\images"       
output_dir = r"D:\Road Management\data_for_label\labels"          
os.makedirs(output_dir, exist_ok=True)

with open(coco_path, 'r') as f:
    coco = json.load(f)

image_map = {img["id"]: img for img in coco["images"]}

for ann in coco["annotations"]:
    image_id = ann["image_id"]
    image_info = image_map[image_id]

    file_stem = Path(image_info["file_name"]).stem
    width = image_info["width"]
    height = image_info["height"]

    x, y, w, h = ann["bbox"]
    class_id = ann["category_id"] - 1 

    x_center = (x + w / 2) / width
    y_center = (y + h / 2) / height
    w /= width
    h /= height

    label_path = os.path.join(output_dir, f"{file_stem}.txt")
    with open(label_path, 'a') as f:
        f.write(f"{class_id} {x_center:.6f} {y_center:.6f} {w:.6f} {h:.6f}\n")


#model training

In [None]:
!pip install -U ultralytics

from ultralytics import YOLO

model = YOLO('yolov8s.pt')

results = model.train(
    data=r"C:\Users\krish\Documents\a_aRMP\Pr\data.yaml",
    epochs=50,
    imgsz=640,
    batch=8,
    cache=True,
    project='road',
    name='yolov8_coco',
    val=True
)




#Labelling

In [None]:
model = YOLO(r"D:\Road Management\data_for_label\road\yolov8m_coco\weights\best.pt")

results = model.predict(source=r"D:\Road Management\for_prediction", save=True, save_txt=True, imgsz=640, conf=0.25)

#Extracting GPS Data

In [None]:
!pip install pytesseract opencv-python

import pytesseract
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"

import cv2
import re
import pandas as pd
import os
from PIL import Image

# Setup
folder_path = r"D:\Road Management\A_Project\labelled\images"
output_csv = r"a_try\gps_timestamp_data1.csv"
data = [] 

def numerical_sort(value):
    numbers = re.findall(r'\d+', value)
    return int(numbers[0]) if numbers else -1

files = [f for f in os.listdir(folder_path) if f.lower().endswith('.jpg')]
files.sort(key=numerical_sort)

for filename in files:
    img_path = os.path.join(folder_path, filename)
    img = cv2.imread(img_path)

    if img is None:
        print(f"⚠️ Could not read image: {filename}")
        continue

    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    gray = cv2.GaussianBlur(gray, (3, 3), 0)
    _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

    pil_img = Image.fromarray(thresh)
    extracted_text = pytesseract.image_to_string(pil_img)
    extracted_text = re.sub(r'[^\x00-\x7F]+', ' ', extracted_text)

    lat_match = re.search(r'Lat(?:itude)?\s*[:\-]?\s*([-+]?\d{1,3}\.\d+)', extracted_text, re.IGNORECASE)
    lon_match = re.search(r'Long(?:itude)?\s*[:\-]?\s*([-+]?\d{1,3}\.\d+)', extracted_text, re.IGNORECASE) or \
                re.search(r'Long\s+([-+]?\d{1,3}\.\d+)', extracted_text, re.IGNORECASE)
    time_match = re.search(r'(\d{2}[/\-| ]\d{2}[/\-| ]\d{2,4})\s*(\d{2}:\d{2}:\d{2})\s*([APap][Mm])', extracted_text)

    latitude = lat_match.group(1) if lat_match else "Not found"
    longitude = lon_match.group(1).replace('°', '') if lon_match else "Not found"
    timestamp = f"{time_match.group(1).replace('|', '/').replace('-', '/')} {time_match.group(2)} {time_match.group(3).upper()}" \
        if time_match else "Not found"

    data.append({
        "Image": filename,
        "Latitude": latitude,
        "Longitude": longitude,
        "Timestamp": timestamp
    })

    print(f"{filename} → Lat: {latitude}, Lon: {longitude}, Time: {timestamp}")

df = pd.DataFrame(data)
df.to_csv(output_csv, index=False)
print(f"\n✅ Data saved to {output_csv}")


#separating frames based on Location

In [None]:
!pip install pandas scikit-learn geopy

import pandas as pd
import os
import shutil
from sklearn.cluster import DBSCAN
import numpy as np
from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter
import re

csv_path = r"D:\Road Management\A_Project\gps_timestamp_data.csv"
image_folder = r"D:\Road Management\A_Project\labelled\images"
output_base = r"C:\Users\krish\Documents\a_aRMP\a_try\location_wise_frames"
distance_threshold_meters = 1000
min_samples_cluster = 1
output_csv = r"C:\Users\krish\Documents\a_aRMP\a_try\cluster.csv"

def clean_folder_name(name):
    name = re.sub(r'[\\/:"*?<>|]+', '', name)
    name = name.replace(' ', '_')
    return name[:100].strip()

def get_location_name(geolocator, lat, lon):
    try:
        location = geolocator.reverse((lat, lon), language='en')
        if location and location.raw.get('address'):
            address = location.raw['address']
            for key in ['road', 'suburb', 'neighbourhood', 'village', 'town', 'city', 'county', 'state']:
                if key in address:
                    return address[key]
        return "Unknown_Location"
    except Exception as e:
        print(f"⚠️ Reverse geocoding error at ({lat},{lon}): {e}")
        return "Unknown_Location"


def main():
    print("⏳ Loading data...")
    df = pd.read_csv(csv_path)

    df = df[df['Latitude'].apply(lambda x: str(x).lower() != 'not found')]
    df = df[df['Longitude'].apply(lambda x: str(x).lower() != 'not found')]
    df['Latitude'] = df['Latitude'].astype(float)
    df['Longitude'] = df['Longitude'].astype(float)

    df['Timestamp'] = pd.to_datetime(df['Timestamp'], errors='coerce', dayfirst=True)
    missing_ts = df['Timestamp'].isna().sum()
    print(f"⚠️ Missing timestamps in {missing_ts} rows.")

    df['Sort_Key'] = df['Timestamp'].astype(str) + "_" + df['Image']
    df = df.sort_values(by='Sort_Key')

    coords = df[['Latitude', 'Longitude']].to_numpy()
    radians_coords = np.radians(coords)
    kms_per_radian = 6371.0088
    epsilon = distance_threshold_meters / 1000.0 / kms_per_radian

    print(f"⏳ Clustering with DBSCAN (eps={epsilon:.6f})...")
    db = DBSCAN(eps=epsilon, min_samples=min_samples_cluster, algorithm='ball_tree', metric='haversine')
    cluster_labels = db.fit_predict(radians_coords)
    df['Group'] = cluster_labels
    n_clusters = len(set(cluster_labels))
    print(f"✅ Found {n_clusters} groups.")

    geolocator = Nominatim(user_agent="gps_grouping_app")
    geocode = RateLimiter(geolocator.reverse, min_delay_seconds=1)

    cluster_info = {
        'Group_ID': [],
        'Folder_Name': [],
        'Start_Coordinates': [],
        'End_Coordinates': [],
        'Start_Location': [],
        'End_Location': [],
        'Number_of_Images': []
    }

    group_folder_map = {}
    location_count = {}

    for group_id in sorted(set(cluster_labels)):
        group_df = df[df['Group'] == group_id].sort_values(by='Sort_Key')
        group_points = group_df[['Latitude', 'Longitude']]
        group_images = group_df['Image']

        start_lat, start_lon = group_points.iloc[0]['Latitude'], group_points.iloc[0]['Longitude']
        end_lat, end_lon = group_points.iloc[-1]['Latitude'], group_points.iloc[-1]['Longitude']

        start_location = get_location_name(geolocator, start_lat, start_lon)
        end_location = get_location_name(geolocator, end_lat, end_lon)

        base_name = clean_folder_name(start_location)
        location_count[base_name] = location_count.get(base_name, 0) + 1
        folder_name = f"{base_name}_{location_count[base_name]}"

        group_folder_map[group_id] = folder_name

        print(f"Group {group_id} -> Folder: {folder_name} | Start: ({start_lat}, {start_lon}) | End: ({end_lat}, {end_lon})")

        cluster_info['Group_ID'].append(group_id)
        cluster_info['Folder_Name'].append(folder_name)
        cluster_info['Start_Coordinates'].append(f"{start_lat}, {start_lon}")
        cluster_info['End_Coordinates'].append(f"{end_lat}, {end_lon}")
        cluster_info['Start_Location'].append(start_location)
        cluster_info['End_Location'].append(end_location)
        cluster_info['Number_of_Images'].append(len(group_images))

    cluster_df = pd.DataFrame(cluster_info)
    cluster_df.to_csv(output_csv, index=False)
    print(f"✅ Cluster info saved to {output_csv}")

    print(f"⏳ Copying images to folders...")
    os.makedirs(output_base, exist_ok=True)

    for group_id, folder_name in group_folder_map.items():
        group_folder_path = os.path.join(output_base, folder_name)
        os.makedirs(group_folder_path, exist_ok=True)

        group_images = df[df['Group'] == group_id]['Image']
        for img in group_images:
            src = os.path.join(image_folder, img)
            dst = os.path.join(group_folder_path, img)
            if os.path.exists(src):
                shutil.copy2(src, dst)
            else:
                print(f"⚠️ File not found: {src}")

    print("✅ All done!")

if __name__ == "__main__":
    main()


#Sepating in different folders based on label type with annotation

In [None]:
import os
import shutil
from collections import Counter

annotation_folder = r"D:\Road Management\A_Project\labelled\labels" 
location_base_folder = r"C:\Users\krish\Documents\a_aRMP\a_try\location_wise_frames" 
priority_labels = ["surface damage", "pothole", "cracks", "manhole"]

category_map = {
    0: "pothole", 1: "manhole", 2: "surface damage", 3: "cracks",
    4: "edge line", 5: "lane mark", 6: "lane divider", 7: "zebra crossing",
    8: "speed breakers", 9: "no entry zone", 10: "warning lines",
    11: "patches", 12: "sign board"
}

def categorize_labels_in_locations():
    for location_folder_name in os.listdir(location_base_folder):
        location_folder_path = os.path.join(location_base_folder, location_folder_name)
        if not os.path.isdir(location_folder_path):
            continue

        images = [f for f in os.listdir(location_folder_path) if f.lower().endswith(('.jpg', '.png', '.jpeg'))]

        for image_name in images:
            base_name = os.path.splitext(image_name)[0]
            label_name = base_name + ".txt"
            label_path = os.path.join(annotation_folder, label_name)
            image_path = os.path.join(location_folder_path, image_name)

            if not os.path.exists(label_path):
                print(f"Label file missing for {image_name}")
                continue

            label_counts = Counter()
            box_data = []
            with open(label_path, "r") as f:
                for line in f.readlines():
                    values = line.split()
                    class_id = int(values[0])
                    label_str = category_map.get(class_id, "unknown")
                    label_counts[label_str] += 1
                    box_data.append((label_str, float(values[1]), float(values[2]), float(values[3]), float(values[4])))

            if not label_counts:
                print(f"No labels found in {label_name}")
                continue

            most_common_labels = label_counts.most_common()
            top_label = most_common_labels[0][0]
            tied_labels = [label for label, count in most_common_labels if count == most_common_labels[0][1]]
            
            prioritized = [label for label in tied_labels if label in priority_labels]
            if prioritized:
                top_label = prioritized[0]
            else:
                max_area = 0
                for label, x_center, y_center, width, height in box_data:
                    if label in tied_labels and x_center < 0.5:
                        area = width * height
                        if area > max_area:
                            max_area = area
                            top_label = label

            label_folder_path = os.path.join(location_folder_path, top_label)
            label_subfolder = os.path.join(label_folder_path, "labels")
            image_subfolder = os.path.join(label_folder_path, "images")
            os.makedirs(label_subfolder, exist_ok=True)
            os.makedirs(image_subfolder, exist_ok=True)

            shutil.copy(label_path, os.path.join(label_subfolder, label_name))
            print(f"Copied {label_name} to {label_subfolder}")

            shutil.move(image_path, os.path.join(image_subfolder, image_name))
            print(f"Moved {image_name} to {image_subfolder}")

categorize_labels_in_locations()
print("✅ Images moved and label files copied into separate folders inside label-wise directories.")


#total summary in csv

In [None]:
import os
import pandas as pd
from collections import Counter

csv_path = r"C:\Users\krish\Documents\a_aRMP\a_try\cluster.csv" 
base_folder = r"C:\Users\krish\Documents\a_aRMP\a_try\location_wise_frames"  
category_map = {
    0: "pothole", 1: "manhole", 2: "surface damage", 3: "cracks",
    4: "edge line", 5: "lane mark", 6: "lane divider", 7: "zebra crossing",
    8: "speed breakers", 9: "no entry zone", 10: "warning lines",
    11: "patches", 12: "sign board"
}

label_names = list(category_map.values())

df = pd.read_csv(csv_path)

for label in label_names:
    df[label] = 0

for idx, row in df.iterrows():
    folder_name = row["Folder_Name"]
    folder_path = os.path.join(base_folder, folder_name)
    
    label_counter = Counter()
    
    if os.path.exists(folder_path):
        for label_folder in os.listdir(folder_path):
            label_folder_path = os.path.join(folder_path, label_folder, "labels")
            if not os.path.isdir(label_folder_path):
                continue
            
            for txt_file in os.listdir(label_folder_path):
                if txt_file.endswith(".txt"):
                    txt_path = os.path.join(label_folder_path, txt_file)
                    with open(txt_path, "r") as f:
                        for line in f:
                            parts = line.strip().split()
                            if not parts:
                                continue
                            class_id = int(parts[0])
                            label_str = category_map.get(class_id, "unknown")
                            label_counter[label_str] += 1

        for label in label_names:
            df.at[idx, label] = label_counter[label]

output_csv_path = r"C:\Users\krish\Documents\a_aRMP\a_try\summary.csv"
df.to_csv(output_csv_path, index=False)
print("✅ Updated cluster summary saved with label-wise counts.")
