<a href="https://colab.research.google.com/github/Nada-Elghaweet/Ai-safety-system/blob/main/Late_fusions.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [56]:
import pickle
import joblib
import librosa
import numpy as np
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing import image
import os
import zipfile
import cv2
from skimage.feature import hog
import warnings
warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=FutureWarning)

In [57]:
audio_svm = joblib.load("violence_svc_model (1).pkl")
audio_lr  = joblib.load("audio_pipeline.pkl")
pca_svm_pipeline = joblib.load("violence_pca_svm_pipeline.joblib")
mobilenet_model = load_model("mobilenet_model.h5")
image_rf  = joblib.load("random_forest_model (1).pkl")



In [58]:
CLASS_NAMES = ["Non_violence ", "violence "]
SAMPLE_RATE = 16000

In [59]:
#AUDIO FEATURE EXTRACTION
def extract_mfcc_40(audio_path):
    audio, sr = librosa.load(audio_path, sr=SAMPLE_RATE)
    mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=40)
    return np.mean(mfcc, axis=1).reshape(1, -1)

def extract_mfcc_13(audio_path):
    audio, sr = librosa.load(audio_path, sr=SAMPLE_RATE)
    mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=13)
    return np.mean(mfcc, axis=1).reshape(1, -1)


In [60]:
AUDIO_DIR = "/content/audio_test.zip"
extract_path = "/content/audio_test"
os.makedirs(extract_path, exist_ok=True)

In [61]:
with zipfile.ZipFile(AUDIO_DIR, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

In [62]:
audio_files = [
    os.path.join(root, f)
    for root, dirs, files in os.walk(extract_path)
    for f in files
    if f.lower().endswith((".wav", ".mp3", ".m4a")) and not f.startswith("._")
]

In [63]:
#IMAGE FEATURE PREPROCESSING
TEST_ZIP = "/content/Test_image.zip"
TEST_EXTRACT = "/content/test_images"
os.makedirs(TEST_EXTRACT, exist_ok=True)

with zipfile.ZipFile(TEST_ZIP, 'r') as zip_ref:
    zip_ref.extractall(TEST_EXTRACT)


In [64]:
TEST_EXTRACT = "/content/test_images/Test_image"

test_images = [
    os.path.join(TEST_EXTRACT, f)
    for f in os.listdir(TEST_EXTRACT)
    if f.lower().endswith((".jpg", ".png", ".jpeg"))
]
print("Found images:", test_images)


Found images: ['/content/test_images/Test_image/WhatsApp Image 2025-12-22 at 12.18.15 AM.jpeg', '/content/test_images/Test_image/WhatsApp Image 2025-12-22 at 12.18.15 AM (1).jpeg', '/content/test_images/Test_image/WhatsApp Image 2025-12-22 at 12.18.13 AM.jpeg', '/content/test_images/Test_image/WhatsApp Image 2025-12-22 at 12.18.12 AM.jpeg', '/content/test_images/Test_image/WhatsApp Image 2025-12-22 at 12.18.14 AM.jpeg', '/content/test_images/Test_image/WhatsApp Image 2025-12-22 at 12.18.13 AM (1).jpeg', '/content/test_images/Test_image/WhatsApp Image 2025-12-22 at 12.18.13 AM (2).jpeg', '/content/test_images/Test_image/WhatsApp Image 2025-12-22 at 12.18.14 AM (2).jpeg', '/content/test_images/Test_image/WhatsApp Image 2025-12-22 at 12.18.14 AM (1).jpeg', '/content/test_images/Test_image/WhatsApp Image 2025-12-22 at 12.18.15 AM (2).jpeg']


In [65]:
def preprocess_image(path):
    # MobileNet
    img_mn = image.load_img(path, target_size=(128,128))
    x_mn = np.expand_dims(image.img_to_array(img_mn)/255.0, axis=0)

    # PCA-SVM & Random Forest
    img = cv2.imread(path)

    # For PCA-SVM: grayscale , resize 128x128, max pooling
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    gray = cv2.resize(gray, (128,128))

    # max pooling like training
    h, w = gray.shape
    pool_size = 2
    gray = gray[:h - h % pool_size, :w - w % pool_size]
    gray = gray.reshape(h//pool_size, pool_size, w//pool_size, pool_size).max(axis=(1,3))
    x_pca_svm = (gray/255.0).flatten().reshape(1,-1)

    # For Random Forest HOG + color hist
    img_rf = cv2.resize(img, (64,64))
    gray_rf = cv2.cvtColor(img_rf, cv2.COLOR_BGR2GRAY)
    hog_feat = hog(gray_rf, pixels_per_cell=(8,8), cells_per_block=(2,2), feature_vector=True)
    hist = cv2.calcHist([img_rf],[0,1,2],None,[8,8,8],[0,256,0,256,0,256])
    hist = cv2.normalize(hist,hist).flatten()
    x_rf = np.hstack([hog_feat, hist]).reshape(1,-1)

    return x_mn, x_rf, x_pca_svm


In [66]:
def predict_audio(file_path):
    f40 = extract_mfcc_40(file_path)
    f13 = extract_mfcc_13(file_path)

    # Audio LR
    lr_pred = audio_lr.predict(f40)[0]
    lr_prob = audio_lr.predict_proba(f40)[0][1]

    # Audio SVM
    svm_pred = audio_svm.predict(f13)[0]
    svm_prob = audio_svm.predict_proba(f13)[0][1]

    return {
        "audio_lr": (lr_pred, lr_prob),
        "audio_svm": (svm_pred, svm_prob)
    }

In [67]:
def predict_image(path):
    x_mn, x_rf, x_pca_svm = preprocess_image(path)

    # MobileNet
    mn_prob = mobilenet_model.predict(x_mn, verbose=0)[0][0]
    mn_label = int(mn_prob >= 0.5)

    # Random Forest
    rf_pred = image_rf.predict(x_rf)[0]
    rf_label = int(rf_pred)

    # PCA-SVM
    pca_svm_pred = pca_svm_pipeline.predict(x_pca_svm)[0]
    pca_svm_label = int(pca_svm_pred)

    return {
        "mobilenet": (mn_label, mn_prob),
        "random_forest": (rf_label, None),
        "pca_svm": (pca_svm_label, None)
    }

In [68]:
def majority_vote(audio_preds, image_preds):
    votes = [
        audio_preds["audio_lr"][0],
        audio_preds["audio_svm"][0],
        image_preds["mobilenet"][0],
        image_preds["random_forest"][0],
        image_preds["pca_svm"][0]
    ]
    overall = int(sum(votes) >= 3)  # >=3 out of 5 ‚Üí violence
    return overall

In [69]:
print("üéß Audio Predictions + üñºÔ∏è Image Predictions + üèÜ Overall\n")
for file in audio_files:
    audio_preds = predict_audio(file)
    # Assume image filename matches audio name with extension change, or skip if no image
    img_file = file.replace("audio_test", "test_images/Test_image").rsplit(".",1)[0]+".jpg"
    if os.path.exists(img_file):
        image_preds = predict_image(img_file)
    else:
        # if no image, use dummy zeros
        image_preds = {"mobilenet": (0,None), "random_forest": (0,None), "pca_svm": (0,None)}

    overall = majority_vote(audio_preds, image_preds)

üéß Audio Predictions + üñºÔ∏è Image Predictions + üèÜ Overall



In [70]:
print(f"üìÑ File: {file}")
print(f"   Audio LR   ‚Üí {CLASS_NAMES[audio_preds['audio_lr'][0]]} ({audio_preds['audio_lr'][1]:.2f})")
print(f"   Audio SVM  ‚Üí {CLASS_NAMES[audio_preds['audio_svm'][0]]} ({audio_preds['audio_svm'][1]:.2f})")
print(f"   MobileNet  ‚Üí {CLASS_NAMES[image_preds['mobilenet'][0]]} ({image_preds['mobilenet'][1] if image_preds['mobilenet'][1] else 0:.2f})")
print(f"   RandomForest ‚Üí {CLASS_NAMES[image_preds['random_forest'][0]]}")
print(f"   PCA-SVM      ‚Üí {CLASS_NAMES[image_preds['pca_svm'][0]]}")
print(f"   üèÜ Overall   ‚Üí {CLASS_NAMES[overall]}")
print("-"*45)

üìÑ File: /content/audio_test/audio_test/Calm 2.m4a
   Audio LR   ‚Üí Non_violence  (0.11)
   Audio SVM  ‚Üí violence  (1.00)
   MobileNet  ‚Üí Non_violence  (0.00)
   RandomForest ‚Üí Non_violence 
   PCA-SVM      ‚Üí Non_violence 
   üèÜ Overall   ‚Üí Non_violence 
---------------------------------------------
