<a href="https://colab.research.google.com/github/Hcktopia/3rd-Week-/blob/main/BradyHackathon.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import files
uploaded = files.upload()  # Upload your zip file (e.g. brady_project.zip)

In [None]:
import os
import csv
from ultralytics import YOLO

def read_classes_from_folder(folder_path):
    """
    Reads class names from a _classes.csv file located in the given folder.
    The CSV should have one class name per line.
    """
    csv_file = os.path.join(folder_path, "_classes.csv")
    classes = []
    if os.path.exists(csv_file):
        with open(csv_file, newline='', encoding='utf-8') as csvfile:
            reader = csv.reader(csvfile)
            for row in reader:
                if row and row[0].strip():
                    classes.append(row[0].strip())
    else:
        print(f"No _classes.csv found in {folder_path}")
    return classes

def main():
    # Base directory on Google Drive for your dataset.
    base_dir = r"/content/drive/MyDrive/datahack"
    base_dir = base_dir.replace("\\", "/")

    # Set folder paths.
    train_folder = os.path.join(base_dir, "train")
    val_folder = os.path.join(base_dir, "val")
    test_folder = os.path.join(base_dir, "test")  # Not used in training, but available.

    # Read class names from the _classes.csv file in the training folder.
    classes = read_classes_from_folder(train_folder)
    if not classes:
        print("No classes found in _classes.csv. Please ensure the file exists and has the class names.")
        return
    nc = len(classes)

    # Create a YAML configuration string for training.
    data_yaml_content = (
        f"train: {train_folder}\n"
        f"val: {val_folder}\n"
        f"nc: {nc}\n"
        f"names: {classes}\n"
    )

    # Save the YAML file in the base directory.
    yaml_path = os.path.join(base_dir, "data.yaml")
    with open(yaml_path, "w", encoding="utf-8") as f:
        f.write(data_yaml_content)

    print("Created data.yaml for training:")
    print(data_yaml_content)

    # Load your YOLO classification model (using a pre-trained YOLO11n model).
    model_cls = YOLO("yolo11n-cls.pt")

    # Train the model using the generated YAML file.
    # Note: The data parameter expects a path to the YAML file.
    model_cls.train(data=yaml_path, epochs=10, imgsz=640)
    print("Training complete.")

if __name__ == "__main__":
    main()

In [None]:
import os
import torch
from ultralytics import YOLO

#########################################
#   FIXED CODE DESCRIPTIONS             #
#########################################
base_code_descriptions = {
    "OSHA 1910.37(a)(3)": "Employers must provide machine guarding for fixed machinery to protect workers from moving parts.",
    "OSHA 1910.303(e)(1)": "Electrical equipment must be marked with the manufacturer's identification and rating information.",
    "OSHA 1910.303(g)(1)": "Adequate working space must be maintained around electrical equipment for safe operation and maintenance.",
    "OSHA 1910.157(c)(1)": "Portable fire extinguishers must be provided, properly mounted, and clearly identified for quick access.",
    "ANSI A13.1 (Pipe Marking)": "This standard defines requirements for marking and identifying piping systems using color codes and labels.",
    "ANSI Z358.1-2014 (Emergency Equipment)": "This standard specifies requirements for emergency eyewash and shower equipment to ensure rapid decontamination."
}

#########################################
#   CLASSIFICATION PIPELINE             #
#########################################
def classify_and_caption(image_path, base_code_descriptions, device, model_path):
    """
    Classify an image using a YOLO classification model fine-tuned on 12 classes.
    The predicted class is in the form "<BaseCode>_Before" or "<BaseCode>_After".

    - If the predicted class ends with "_Before", it is considered a violation.
    - If it ends with "_After", it is considered no violation.

    Returns a tuple (classification, probability, caption) where:
      - classification is a string like "Violation - ANSI Z358.1-2014 (Emergency Equipment)"
      - probability is the model's confidence
      - caption is an explanation (or "No violation detected")
    """
    if not os.path.isfile(model_path):
        print(f"Model file not found at {model_path}.")
        return None
    model_cls = YOLO(model_path)
    results = model_cls.predict(source=image_path, verbose=False)
    result = results[0].cpu()

    predicted_index = int(result.probs.top1)
    predicted_prob = float(result.probs.top1conf)

    # Define the 12 class names (order must match your model's output)
    class_names = [
        "OSHA 1910.37(a)(3)_Before",
        "OSHA 1910.37(a)(3)_After",
        "OSHA 1910.303(e)(1)_Before",
        "OSHA 1910.303(e)(1)_After",
        "OSHA 1910.303(g)(1)_Before",
        "OSHA 1910.303(g)(1)_After",
        "OSHA 1910.157(c)(1)_Before",
        "OSHA 1910.157(c)(1)_After",
        "ANSI A13.1 (Pipe Marking)_Before",
        "ANSI A13.1 (Pipe Marking)_After",
        "ANSI Z358.1-2014 (Emergency Equipment)_Before",
        "ANSI Z358.1-2014 (Emergency Equipment)_After"
    ]

    if predicted_index < 0 or predicted_index >= len(class_names):
        print(f"Warning: Invalid predicted index {predicted_index}. Defaulting to class 0.")
        predicted_index = 0

    predicted_class = class_names[predicted_index]
    if predicted_class.endswith("_Before"):
        base_code = predicted_class.replace("_Before", "")
        classification = f"Violation - {base_code}"
        caption = base_code_descriptions.get(base_code, "No description available.")
    elif predicted_class.endswith("_After"):
        base_code = predicted_class.replace("_After", "")
        classification = f"No Violation - {base_code}"
        caption = "No violation detected."
    else:
        classification = predicted_class
        caption = "No description available."

    return classification, predicted_prob, caption

#########################################
#                MAIN                   #
#########################################
def main():
    # Set Google Drive paths for the test dataset.
    test_dir = r"/content/drive/MyDrive/data_flat/test"
    valid_extensions = (".jpg", ".jpeg", ".png", ".bmp")
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Define the two models to test:
    models_to_test = [
        {"label": "Best Model", "path": r"/content/drive/MyDrive/train#2/weights/best.pt"},
        {"label": "Last Model", "path": r"/content/drive/MyDrive/train#2/weights/last.pt"}
    ]

    for model_info in models_to_test:
        print(f"\nTesting model: {model_info['label']} (Path: {model_info['path']})")
        correct = 0
        total = 0
        # Iterate through test images
        for root, _, files in os.walk(test_dir):
            for file in files:
                if file.lower().endswith(valid_extensions):
                    image_path = os.path.join(root, file)
                    # Infer ground truth from folder name (for evaluation)
                    if "_before" in root.lower():
                        gt_status = "Violation"
                    elif "_after" in root.lower():
                        gt_status = "No Violation"
                    else:
                        gt_status = None

                    result = classify_and_caption(image_path, base_code_descriptions, device, model_info["path"])
                    if result is None:
                        continue
                    classification, pred_prob, caption = result
                    print(f"Prediction: {classification} (Confidence: {pred_prob:.2f})")
                    print(f"Violation Code: {classification.split(' - ')[-1]}")
                    print(f"Caption: {caption}")
                    print("-" * 50)

                    if gt_status is not None:
                        total += 1
                        if classification.lower().startswith(gt_status.lower()):
                            correct += 1

        if total > 0:
            accuracy = 100 * correct / total
            print(f"\n{model_info['label']} Test Accuracy: {accuracy:.2f}% ({correct} out of {total} images)")
        else:
            print(f"No labeled test images found for evaluation with model {model_info['label']}.")

if __name__ == "__main__":
    main()

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!pip install ultralytics