In [None]:
# IMPORTANT: SOME KAGGLE DATA SOURCES ARE PRIVATE
# RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES.
import kagglehub
kagglehub.login()


In [None]:
# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

bajrangikumarmishra_warehouse_dataset_path = kagglehub.dataset_download('bajrangikumarmishra/warehouse-dataset')
bajrangikumarmishra_sathat_dataset_path = kagglehub.dataset_download('bajrangikumarmishra/sathat-dataset')
bajrangikumarmishra_anamolies_detection_path = kagglehub.dataset_download('bajrangikumarmishra/anamolies-detection')
bajrangikumarmishra_rackfall_dataset_path = kagglehub.dataset_download('bajrangikumarmishra/rackfall-dataset')
bajrangikumarmishra_fight_dataset_path = kagglehub.dataset_download('bajrangikumarmishra/fight-dataset')
bajrangikumarmishra_mangodb_idk_path = kagglehub.dataset_download('bajrangikumarmishra/mangodb-idk')

print('Data source import complete.')


In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All"
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import cv2
import os
import matplotlib.pyplot as plt
import matplotlib.patches as patches

# Function to parse YOLO labels from the label file
def parse_labels_from_file(file_path):
    labels = []
    with open(file_path, 'r') as file:
        for line in file:
            label_parts = line.strip().split()
            if len(label_parts) == 5:
                class_id, x, y, w, h = map(float, label_parts)
                labels.append((int(class_id), x, y, w, h))
    return labels

# Convert YOLO format to pixel coordinates
def yolo_to_pixels(image_width, image_height, box):
    x, y, w, h = box
    xmin = int((x - w / 2) * image_width)
    xmax = int((x + w / 2) * image_width)
    ymin = int((y - h / 2) * image_height)
    ymax = int((y + h / 2) * image_height)
    return xmin, ymin, xmax, ymax

# Detect anomalies based on custom criteria
def detect_anomalies(labels, image_height, image_width):
    anomalies = []
    for label in labels:
        class_id, x, y, w, h = label
        _, ymin, _, ymax = yolo_to_pixels(image_width, image_height, (x, y, w, h))

        # Custom anomaly criterion: Object near the bottom of the image
        if ymax > 0.85 * image_height:  # Objects in the bottom 15% of the image
            anomalies.append((class_id, x, y, w, h))
    return anomalies

# Display the image with bounding boxes and detected anomalies
def display_image_with_labels(image_path, labels, is_anomalies=False):
    # Load the image
    image = cv2.imread(image_path)
    if image is None:
        print(f"Error loading image: {image_path}")
        return
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image_height, image_width, _ = image.shape

    # Detect anomalies if required
    anomalies = detect_anomalies(labels, image_height, image_width) if is_anomalies else []

    # Create figure
    fig, ax = plt.subplots(1, 1, figsize=(8, 6))
    ax.imshow(image_rgb)

    box_count = 0  # Initialize box count

    # Plot each label
    for label in labels:
        class_id, x, y, w, h = label
        xmin, ymin, xmax, ymax = yolo_to_pixels(image_width, image_height, (x, y, w, h))
        color = 'r' if (class_id, x, y, w, h) in anomalies else 'g'

        # Draw bounding box
        rect = patches.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, linewidth=2, edgecolor=color, facecolor='none')
        ax.add_patch(rect)

        # Add text
        ax.text(xmin, ymin - 10, f"Class {class_id}", fontsize=8, color='b')
        if (class_id, x, y, w, h) in anomalies:
            ax.text(xmin, ymax + 10, 'Object Fallen', fontsize=10, color='r')

        box_count += 1

    # Display information
    if anomalies:
        ax.text(10, 20, f"Total Boxes: {box_count}, Anomalies: {len(anomalies)}", fontsize=12, color='yellow', bbox=dict(facecolor='black', alpha=0.5))
    else:
        ax.text(10, 20, f"Total Boxes: {box_count}\nNo Anomalies Detected", fontsize=12, color='yellow', bbox=dict(facecolor='black', alpha=0.5))

    # Plot settings
    ax.axis('off')
    plt.tight_layout()
    plt.show()

# Paths for datasets
normal_img_path = '/kaggle/input/sathat-dataset/train/images'  # Update this path
normal_labels_path = '/kaggle/input/sathat-dataset/train/labels'  # Update this path
anomalies_img_path = '/kaggle/input/anamolies-detection/train/images'  # Update this path
anomalies_labels_path = '/kaggle/input/anamolies-detection/train/labels'  # Update this path
# Process and display normal images
print("Processing Normal Images:")
for image_name in os.listdir(normal_img_path):
    image_path = os.path.join(normal_img_path, image_name)
    label_path = os.path.join(normal_labels_path, f"{os.path.splitext(image_name)[0]}.txt")
    if os.path.exists(label_path):
        labels = parse_labels_from_file(label_path)
        display_image_with_labels(image_path, labels, is_anomalies=False)

# Process and display anomaly images
print("Processing Anomalies Dataset:")
for image_name in os.listdir(anomalies_img_path):
    image_path = os.path.join(anomalies_img_path, image_name)
    label_path = os.path.join(anomalies_labels_path, f"{os.path.splitext(image_name)[0]}.txt")
    if os.path.exists(label_path):
        labels = parse_labels_from_file(label_path)
        display_image_with_labels(image_path, labels, is_anomalies=True)

In [None]:
import cv2
import os
import matplotlib.pyplot as plt
import matplotlib.patches as patches

# Function to parse YOLO labels from the label file
def parse_labels_from_file(file_path):
    labels = []
    with open(file_path, 'r') as file:
        for line in file:
            label_parts = line.strip().split()
            if len(label_parts) == 5:
                class_id, x, y, w, h = map(float, label_parts)
                labels.append((int(class_id), x, y, w, h))
    return labels

# Convert YOLO format to pixel coordinates
def yolo_to_pixels(image_width, image_height, box):
    x, y, w, h = box
    xmin = max(0, int((x - w / 2) * image_width))
    xmax = min(image_width, int((x + w / 2) * image_width))
    ymin = max(0, int((y - h / 2) * image_height))
    ymax = min(image_height, int((y + h / 2) * image_height))
    return xmin, ymin, xmax, ymax

# Detect anomalies based on criteria, including rack fall, fight, and item fall detection
def detect_anomalies(labels, image_height, image_width, detect_type=None):
    anomalies = []
    for label in labels:
        class_id, x, y, w, h = label
        xmin, ymin, xmax, ymax = yolo_to_pixels(image_width, image_height, (x, y, w, h))

        # Custom criteria for detecting anomalies
        box_height = ymax - ymin
        box_width = xmax - xmin
        area = box_width * box_height
        is_near_bottom = ymax > 0.8 * image_height
        is_small = area < 0.02 * image_width * image_height
        is_large = area > 0.5 * image_width * image_height

        # Determine anomaly type based on the dataset
        is_rack_fall = detect_type == "rack_fall"
        is_fight = detect_type == "fight"
        is_item_fall = detect_type == "anomalies" and (is_near_bottom or is_small or is_large)

        if is_rack_fall or is_fight or is_item_fall:
            anomalies.append((class_id, x, y, w, h))
            if is_rack_fall:
                print(f"Rack Fall detected: Class {class_id} at position ({x:.2f}, {y:.2f}) with area {area:.2f}")
            elif is_fight:
                print(f"Fight detected: Class {class_id} at position ({x:.2f}, {y:.2f}) with area {area:.2f}")
            elif is_item_fall:
                print(f"Item Fall detected: Class {class_id} at position ({x:.2f}, {y:.2f}) with area {area:.2f}")

    return anomalies

# Display the image with bounding boxes and detected anomalies
def display_image_with_labels(image_path, labels, detect_type=None):
    # Load the image
    image = cv2.imread(image_path)
    if image is None:
        print(f"Error loading image: {image_path}")
        return
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image_height, image_width, _ = image.shape

    # Detect anomalies based on the dataset type
    anomalies = detect_anomalies(labels, image_height, image_width, detect_type=detect_type)

    # Create figure
    fig, ax = plt.subplots(1, 1, figsize=(8, 6))
    ax.imshow(image_rgb)

    box_count = 0  # Initialize box count

    # Plot each label
    for label in labels:
        class_id, x, y, w, h = label
        xmin, ymin, xmax, ymax = yolo_to_pixels(image_width, image_height, (x, y, w, h))
        color = 'r' if (class_id, x, y, w, h) in anomalies else 'g'
        label_text = ""

        # Label anomalies based on the type
        if (class_id, x, y, w, h) in anomalies:
            if detect_type == "rack_fall":
                label_text = "Rack Fall"
            elif detect_type == "fight":
                label_text = "Fight"
            elif detect_type == "anomalies":
                label_text = "Item Fall"
            ax.text(xmin, ymax + 10, label_text, fontsize=10, color='r')

        # Draw bounding box
        rect = patches.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, linewidth=2, edgecolor=color, facecolor='none')
        ax.add_patch(rect)

        # Add class text
        ax.text(xmin, ymin - 10, f"Class {class_id}", fontsize=8, color='b')

        box_count += 1

    # Display information
    if anomalies:
        ax.text(10, 20, f"Total Boxes: {box_count}, Anomalies: {len(anomalies)}", fontsize=12, color='yellow', bbox=dict(facecolor='black', alpha=0.5))
    else:
        ax.text(10, 20, f"Total Boxes: {box_count}\nNo Anomalies Detected", fontsize=12, color='yellow', bbox=dict(facecolor='black', alpha=0.5))

    # Plot settings
    ax.axis('off')
    plt.tight_layout()
    plt.show()

# Limit the output to a certain number of images per category
def process_and_display_images(image_path, labels_path, detect_type=None, max_images=10):
    count = 0
    for image_name in sorted(os.listdir(image_path)):
        if count >= max_images:
            break
        image_file = os.path.join(image_path, image_name)
        label_file = os.path.join(labels_path, f"{os.path.splitext(image_name)[0]}.txt")
        if os.path.exists(label_file):
            labels = parse_labels_from_file(label_file)
            print(f"Processing {image_file} with labels {label_file}")
            display_image_with_labels(image_file, labels, detect_type=detect_type)
            count += 1
        else:
            print(f"No label file found for {image_name}")

# Paths for datasets
normal_img_path = '/kaggle/input/sathat-dataset/train/images'  # Update this path
normal_labels_path = '/kaggle/input/sathat-dataset/train/labels'  # Update this path
anomalies_img_path = '/kaggle/input/anamolies-detection/train/images'  # Update this path
anomalies_labels_path = '/kaggle/input/anamolies-detection/train/labels'  # Update this path
rack_fall_img_path = '/kaggle/input/rackfall-dataset/train/images'  # Update this path
rack_fall_labels_path = '/kaggle/input/rackfall-dataset/train/labels'  # Update this path
fight_img_path = '/kaggle/input/fight-dataset/train/images'  # Update this path
fight_labels_path = '/kaggle/input/fight-dataset/train/labels'  # Update this path

# Process and display images
print("Processing Normal Images:")
process_and_display_images(normal_img_path, normal_labels_path, detect_type=None, max_images=10)

print("Processing Anomalies Dataset for Item Fall:")
process_and_display_images(anomalies_img_path, anomalies_labels_path, detect_type="anomalies", max_images=10)

print("Processing Rack Fall Dataset:")
process_and_display_images(rack_fall_img_path, rack_fall_labels_path, detect_type="rack_fall", max_images=10)

print("Processing Fight Detection Dataset:")
process_and_display_images(fight_img_path, fight_labels_path, detect_type="fight", max_images=10)

In [None]:
import csv
import cv2
import os

# Function to parse YOLO labels from the label file
def parse_labels_from_file(file_path):
    labels = []
    with open(file_path, 'r') as file:
        for line in file:
            label_parts = line.strip().split()
            if len(label_parts) == 5:
                class_id, x, y, w, h = map(float, label_parts)
                labels.append((int(class_id), x, y, w, h))
    return labels

# Convert YOLO format to pixel coordinates
def yolo_to_pixels(image_width, image_height, box):
    x, y, w, h = box
    xmin = max(0, int((x - w / 2) * image_width))
    xmax = min(image_width, int((x + w / 2) * image_width))
    ymin = max(0, int((y - h / 2) * image_height))
    ymax = min(image_height, int((y + h / 2) * image_height))
    return xmin, ymin, xmax, ymax

# Detect anomalies based on criteria and return appropriate indicators
def detect_anomalies(labels, detect_fight=False):
    fight_detected, itemfall_detected, rackfall_detected = "", "", ""
    total_objects = len(labels)

    for label in labels:
        class_id, x, y, w, h = label

        is_rack_fall = class_id == 1  # Assume class_id 1 is used for rack fall detection
        is_fight = class_id == 2 if detect_fight else False  # Assume class_id 2 is for fight detection
        is_item_fall = class_id == 3  # Assume class_id 3 is for item fall detection

        if is_rack_fall:
            rackfall_detected = 1
        elif is_fight:
            fight_detected = 1
        elif is_item_fall:
            itemfall_detected = 1

    return fight_detected, itemfall_detected, rackfall_detected, total_objects

# Write results to a single CSV file
def save_results_to_csv(results, csv_filename="consolidated_results.csv"):
    # Specify CSV headers
    headers = ["Dataset", "Image", "Fight", "ItemFall", "RackFall", "Total_Objects"]

    # Write results to CSV
    with open(csv_filename, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(headers)
        writer.writerows(results)
    print(f"Results saved to {csv_filename}")

# Process images and store results in a single CSV
def process_images(image_path, labels_path, dataset_name, detect_fight=False, max_images=10):
    results = []
    count = 0
    for image_name in sorted(os.listdir(image_path)):
        if count >= max_images:
            break
        image_file = os.path.join(image_path, image_name)
        label_file = os.path.join(labels_path, f"{os.path.splitext(image_name)[0]}.txt")
        if os.path.exists(label_file):
            labels = parse_labels_from_file(label_file)
            print(f"Processing {image_file} with labels {label_file}")
            fight_detected, itemfall_detected, rackfall_detected, total_objects = detect_anomalies(
                labels, detect_fight=detect_fight
            )
            results.append([dataset_name, image_name, fight_detected, itemfall_detected, rackfall_detected, total_objects])
            count += 1
        else:
            print(f"No label file found for {image_name}")

    return results

# Paths for datasets
normal_img_path = '/kaggle/input/sathat-dataset/train/images'  # Update this path
normal_labels_path = '/kaggle/input/sathat-dataset/train/labels'  # Update this path
anomalies_img_path = '/kaggle/input/anamolies-detection/train/images'  # Update this path
anomalies_labels_path = '/kaggle/input/anamolies-detection/train/labels'  # Update this path
rack_fall_img_path = '/kaggle/input/rackfall-dataset/train/images'  # Update this path
rack_fall_labels_path = '/kaggle/input/rackfall-dataset/train/labels'  # Update this path
fight_img_path = '/kaggle/input/fight-dataset/train/images'  # Update this path
fight_labels_path = '/kaggle/input/fight-dataset/train/labels'  # Update this path

# Collect results from all datasets
all_results = []
all_results.extend(process_images(normal_img_path, normal_labels_path, "Normal", detect_fight=False, max_images=10))
all_results.extend(process_images(anomalies_img_path, anomalies_labels_path, "Anomalies", detect_fight=False, max_images=10))
all_results.extend(process_images(rack_fall_img_path, rack_fall_labels_path, "Rack Fall", detect_fight=False, max_images=10))
all_results.extend(process_images(fight_img_path, fight_labels_path, "Fight Detection", detect_fight=True, max_images=10))

# Save all results to a single CSV file
save_results_to_csv(all_results, csv_filename="consolidated_results.csv")

In [None]:
import csv
import os

# Function to parse YOLO labels from the label file
def parse_labels_from_file(file_path):
    labels = []
    with open(file_path, 'r') as file:
        for line in file:
            label_parts = line.strip().split()
            if len(label_parts) == 5:
                class_id, x, y, w, h = map(float, label_parts)
                labels.append((int(class_id), x, y, w, h))
    return labels

# Detect anomalies based on dataset type
def detect_anomalies(labels, dataset_type):
    fight_detected, itemfall_detected, rackfall_detected = None, None, None
    total_objects = len(labels)

    if dataset_type == "Normal":
        # Only count total objects, other columns remain None
        pass
    elif dataset_type == "Fight Detection":
        for label in labels:
            if label[0] == 2:  # Class ID 2 for fight
                fight_detected = 1
                break
    elif dataset_type == "Anomalies":
        for label in labels:
            if label[0] == 3:  # Class ID 3 for item fall
                itemfall_detected = 1
                break
    elif dataset_type == "Rack Fall":
        for label in labels:
            if label[0] == 1:  # Class ID 1 for rack fall
                rackfall_detected = 1
                break

    return fight_detected, itemfall_detected, rackfall_detected, total_objects

# Write results to a single CSV file
def save_results_to_csv(results, csv_filename="consolidated_results.csv"):
    headers = ["Dataset", "Image", "Fight", "ItemFall", "RackFall", "Total_Objects"]

    with open(csv_filename, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(headers)
        writer.writerows(results)
    print(f"Results saved to {csv_filename}")

# Process images and store results in a single CSV
def process_images(image_path, labels_path, dataset_name, max_images=10):
    results = []
    count = 0
    for image_name in sorted(os.listdir(image_path)):
        if count >= max_images:
            break
        image_file = os.path.join(image_path, image_name)
        label_file = os.path.join(labels_path, f"{os.path.splitext(image_name)[0]}.txt")
        if os.path.exists(label_file):
            labels = parse_labels_from_file(label_file)
            print(f"Processing {image_file} with labels {label_file}")
            fight_detected, itemfall_detected, rackfall_detected, total_objects = detect_anomalies(
                labels, dataset_name
            )
            results.append([dataset_name, image_name, fight_detected, itemfall_detected, rackfall_detected, total_objects])
            count += 1
        else:
            print(f"No label file found for {image_name}")

    return results

# Paths for datasets
normal_img_path = '/kaggle/input/sathat-dataset/train/images'
normal_labels_path = '/kaggle/input/sathat-dataset/train/labels'
anomalies_img_path = '/kaggle/input/anamolies-detection/train/images'
anomalies_labels_path = '/kaggle/input/anamolies-detection/train/labels'
rack_fall_img_path = '/kaggle/input/rackfall-dataset/train/images'
rack_fall_labels_path = '/kaggle/input/rackfall-dataset/train/labels'
fight_img_path = '/kaggle/input/fight-dataset/train/images'
fight_labels_path = '/kaggle/input/fight-dataset/train/labels'

# Collect results from all datasets
all_results = []
all_results.extend(process_images(normal_img_path, normal_labels_path, "Normal", max_images=10))
all_results.extend(process_images(anomalies_img_path, anomalies_labels_path, "Anomalies", max_images=10))
all_results.extend(process_images(rack_fall_img_path, rack_fall_labels_path, "Rack Fall", max_images=10))
all_results.extend(process_images(fight_img_path, fight_labels_path, "Fight Detection", max_images=10))

# Save all results to a single CSV file
save_results_to_csv(all_results, csv_filename="consolidated_results.csv")

In [None]:
import pandas as pd
df = pd.read_csv('consolidated_results.csv')

In [None]:
import pandas as pd
import numpy as np

# Function to process data for each dataset type
def process_data(df):
    # Initialize new columns with NaN
    df['Fight'] = np.nan
    df['ItemFall'] = np.nan
    df['RackFall'] = np.nan

    for index, row in df.iterrows():
        dataset_type = row['Dataset']
        if dataset_type == 'Normal':
            # For Normal: only Total_Objects is populated
            continue
        elif dataset_type == 'Fight Detection':
            # For Fight Detection: populate Fight, leave others NaN
            df.at[index, 'Fight'] = 1
        elif dataset_type == 'Anomalies':
            # For Anomalies: populate ItemFall, leave others NaN
            df.at[index, 'ItemFall'] = 1
        elif dataset_type == 'Rack Fall':
            # For Rack Fall: populate RackFall, leave others NaN
            df.at[index, 'RackFall'] = 1

    return df

# Process the data
processed_df = process_data(df)

# Display the processed DataFrame
print(processed_df)


In [None]:
# Save to CSV
processed_df.to_csv('processed_dataset.csv', index=False)

# Save to Excel
processed_df.to_excel('processed_dataset.xlsx', index=False)

In [None]:
df

In [None]:
import pandas as pd
# Add a column for rack number
df["Rack_Number"] = df["Image"].rank(method="dense").astype(int)

# Replace NaN with 0
df.fillna(0, inplace=True)
df

In [None]:
# Save to CSV
processed_df.to_csv('processed_dataset1.csv', index=False)

# Save to Excel
processed_df.to_excel('processed_dataset1.xlsx', index=False)