<a href="https://colab.research.google.com/github/Nada-Elghaweet/Ai-safety-system/blob/main/Late_fusions.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [140]:
import pickle
import joblib
import librosa
import numpy as np
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing import image
import os
import zipfile
import cv2
from skimage.feature import hog
import warnings
warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=FutureWarning)

In [141]:
audio_svm = joblib.load("violence_knn_model.pkl")
audio_lr  = joblib.load("audio_pipeline.pkl")
pca_svm_pipeline = joblib.load("violence_pca_svm_pipeline.joblib")
mobilenet_model = load_model("mobilenet_model.h5")
image_rf  = joblib.load("random_forest_model (1).pkl")



In [142]:
CLASS_NAMES = ["Non_violence ", "violence "]
SAMPLE_RATE = 16000

In [143]:
#AUDIO FEATURE EXTRACTION
def extract_mfcc_40(audio_path):
    audio, sr = librosa.load(audio_path, sr=SAMPLE_RATE)
    mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=40)
    return np.mean(mfcc, axis=1).reshape(1, -1)

def extract_mfcc_13(audio_path):
    audio, sr = librosa.load(audio_path, sr=SAMPLE_RATE)
    mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=40)
    return np.mean(mfcc, axis=1).reshape(1, -1)


In [144]:
AUDIO_DIR = "/content/audio_test.zip"
extract_path = "/content/audio_test"
os.makedirs(extract_path, exist_ok=True)

In [145]:
with zipfile.ZipFile(AUDIO_DIR, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

In [146]:
audio_files = [
    os.path.join(root, f)
    for root, dirs, files in os.walk(extract_path)
    for f in files
    if f.lower().endswith((".wav", ".mp3", ".m4a")) and not f.startswith("._")
]

In [147]:
TEST_ZIP = "/content/Test_image.zip"
TEST_EXTRACT = "/content/test_images"
os.makedirs(TEST_EXTRACT, exist_ok=True)

with zipfile.ZipFile(TEST_ZIP, 'r') as zip_ref:
    zip_ref.extractall(TEST_EXTRACT)

# Recursive search for images
test_images = [
    os.path.join(root, f)
    for root, dirs, files in os.walk(TEST_EXTRACT)
    for f in files
    if f.lower().endswith((".jpg", ".png", ".jpeg"))
]

print("Found images:", test_images[:10])


Found images: ['/content/test_images/Test_image/WhatsApp Image 2025-12-22 at 12.18.15 AM.jpeg', '/content/test_images/Test_image/WhatsApp Image 2025-12-22 at 12.18.15 AM (1).jpeg', '/content/test_images/Test_image/WhatsApp Image 2025-12-22 at 12.18.13 AM.jpeg', '/content/test_images/Test_image/WhatsApp Image 2025-12-22 at 12.18.12 AM.jpeg', '/content/test_images/Test_image/WhatsApp Image 2025-12-22 at 12.18.14 AM.jpeg', '/content/test_images/Test_image/WhatsApp Image 2025-12-22 at 12.18.13 AM (1).jpeg', '/content/test_images/Test_image/WhatsApp Image 2025-12-22 at 12.18.13 AM (2).jpeg', '/content/test_images/Test_image/WhatsApp Image 2025-12-22 at 12.18.14 AM (2).jpeg', '/content/test_images/Test_image/WhatsApp Image 2025-12-22 at 12.18.14 AM (1).jpeg', '/content/test_images/Test_image/WhatsApp Image 2025-12-22 at 12.18.15 AM (2).jpeg']


In [148]:
def preprocess_image(path):
    # MobileNet
    img_mn = image.load_img(path, target_size=(128,128))
    x_mn = np.expand_dims(image.img_to_array(img_mn)/255.0, axis=0)

    # PCA-SVM & Random Forest
    img = cv2.imread(path)

    # For PCA-SVM: grayscale , resize 128x128, max pooling
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    gray = cv2.resize(gray, (128,128))

    # max pooling like training
    h, w = gray.shape
    pool_size = 2
    gray = gray[:h - h % pool_size, :w - w % pool_size]
    gray = gray.reshape(h//pool_size, pool_size, w//pool_size, pool_size).max(axis=(1,3))
    x_pca_svm = (gray/255.0).flatten().reshape(1,-1)

    # For Random Forest HOG + color hist
    img_rf = cv2.resize(img, (64,64))
    gray_rf = cv2.cvtColor(img_rf, cv2.COLOR_BGR2GRAY)
    hog_feat = hog(gray_rf, pixels_per_cell=(8,8), cells_per_block=(2,2), feature_vector=True)
    hist = cv2.calcHist([img_rf],[0,1,2],None,[8,8,8],[0,256,0,256,0,256])
    hist = cv2.normalize(hist,hist).flatten()
    x_rf = np.hstack([hog_feat, hist]).reshape(1,-1)

    return x_mn, x_rf, x_pca_svm


In [149]:
def predict_majority(audio_file, image_file):
    # Audio
    f40 = extract_mfcc_40(audio_file)
    f13 = extract_mfcc_13(audio_file)
    audio_preds = [
        audio_lr.predict(f40)[0],
        audio_svm.predict(f13)[0]
    ]

    # Image
    x_mn, x_rf, x_pca_svm = preprocess_image(image_file)
    image_preds = [
        int(mobilenet_model.predict(x_mn, verbose=0)[0][0] >= 0.5),
        image_rf.predict(x_rf)[0],
        pca_svm_pipeline.predict(x_pca_svm)[0]
    ]

     # Combine
    all_preds = audio_preds + image_preds
    majority_vote = int(np.round(np.mean(all_preds)))  # simple majority
    return {
        "audio_file": audio_file,
        "image_file": image_file,
        "audio_preds": audio_preds,
        "image_preds": image_preds,
        "final": CLASS_NAMES[majority_vote]
    }

In [151]:
result = predict_majority("/content/audio_test/audio_test/Calm 2.m4a",
                          "/content/test_images/Test_image/WhatsApp Image 2025-12-22 at 12.18.13 AM (1).jpeg")
print(result)

{'audio_file': '/content/audio_test/audio_test/Calm 2.m4a', 'image_file': '/content/test_images/Test_image/WhatsApp Image 2025-12-22 at 12.18.13 AM (1).jpeg', 'audio_preds': [np.int64(0), np.int64(0)], 'image_preds': [1, np.int64(0), np.int64(1)], 'final': 'Non_violence '}


In [152]:
def print_prediction(result):
    print("üéµ Audio File:", result["audio_file"])
    print("   Audio Model Predictions:")
    print(f"      Audio LR  ‚Üí {CLASS_NAMES[result['audio_preds'][0]]}")
    print(f"      Audio SVM ‚Üí {CLASS_NAMES[result['audio_preds'][1]]}")
    print()
    print("üñºÔ∏è Image File:", result["image_file"])
    print("   Image Model Predictions:")
    print(f"      MobileNet    ‚Üí {CLASS_NAMES[result['image_preds'][0]]}")
    print(f"      RandomForest ‚Üí {CLASS_NAMES[result['image_preds'][1]]}")
    print(f"      PCA-SVM      ‚Üí {CLASS_NAMES[result['image_preds'][2]]}")
    print()
    print("üèÜ Final Prediction (Majority Vote) ‚Üí", result["final"])
    print("-"*60)

# Example usage
result = predict_majority(
    "/content/audio_test/audio_test/Calm 2.m4a",
    "/content/test_images/Test_image/WhatsApp Image 2025-12-22 at 12.18.13 AM (1).jpeg"
)

print_prediction(result)


üéµ Audio File: /content/audio_test/audio_test/Calm 2.m4a
   Audio Model Predictions:
      Audio LR  ‚Üí Non_violence 
      Audio SVM ‚Üí Non_violence 

üñºÔ∏è Image File: /content/test_images/Test_image/WhatsApp Image 2025-12-22 at 12.18.13 AM (1).jpeg
   Image Model Predictions:
      MobileNet    ‚Üí violence 
      RandomForest ‚Üí Non_violence 
      PCA-SVM      ‚Üí violence 

üèÜ Final Prediction (Majority Vote) ‚Üí Non_violence 
------------------------------------------------------------


In [None]:
import gradio as gr
import numpy as np
import os

def run_inference(audio_files, image_files):
    results = []

    # audio_files and image_files are lists of uploaded file paths
    for audio_path, image_path in zip(audio_files, image_files):
        result = predict_majority(audio_path, image_path)

        text = f"üéß Audio: {os.path.basename(audio_path)}\n"
        text += f"   Audio LR  ‚Üí {CLASS_NAMES[result['audio_preds'][0]]}\n"
        text += f"   Audio SVM ‚Üí {CLASS_NAMES[result['audio_preds'][1]]}\n\n"

        text += f"üñºÔ∏è Image: {os.path.basename(image_path)}\n"
        text += f"   MobileNet    ‚Üí {CLASS_NAMES[result['image_preds'][0]]}\n"
        text += f"   RandomForest ‚Üí {CLASS_NAMES[result['image_preds'][1]]}\n"
        text += f"   PCA-SVM      ‚Üí {CLASS_NAMES[result['image_preds'][2]]}\n\n"

        text += f"üèÜ FINAL PREDICTION ‚Üí {result['final']}\n"
        text += "-"*50

        results.append(text)

    return "\n\n".join(results)

# Gradio interface
gui = gr.Interface(
    fn=run_inference,
    inputs=[
        gr.File(file_types=[".wav",".mp3",".m4a"], file_count="multiple", label="Upload Audio Files"),
        gr.File(file_types=[".jpg",".jpeg",".png"], file_count="multiple", label="Upload Image Files")
    ],
    outputs=gr.Textbox(lines=25, label="Predictions"),
    title="üé• Multimodal Violence Detection (Audio + Image)",
    description="Late Fusion of 5 Models (2 Audio + 3 Image)"
)

gui.launch(debug=True)


It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://6557080d56269d5028.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


ERROR:    Exception in ASGI application
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/uvicorn/protocols/http/h11_impl.py", line 403, in run_asgi
    result = await app(  # type: ignore[func-returns-value]
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/uvicorn/middleware/proxy_headers.py", line 60, in __call__
    return await self.app(scope, receive, send)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/fastapi/applications.py", line 1139, in __call__
    await super().__call__(scope, receive, send)
  File "/usr/local/lib/python3.12/dist-packages/starlette/applications.py", line 107, in __call__
    await self.middleware_stack(scope, receive, send)
  File "/usr/local/lib/python3.12/dist-packages/starlette/middleware/errors.py", line 186, in __call__
    raise exc
  File "/usr/local/lib/python3.12/dist-packages/starlette/middleware/errors.py",