## **SupaBase Installation** ##

In [51]:
!pip install supabase



In [52]:
!pip install gdown



## **Getting all the recordings from SupaBase**

In [53]:
# Initialize Supabase Client
from supabase import create_client, Client

# Replace your actual Supabase URL and Key directly or use environment variables properly
url = "https://dfwhoaupkzvcztmrknzm.supabase.co"  
key = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6ImRmd2hvYXVwa3p2Y3p0bXJrbnptIiwicm9sZSI6ImFub24iLCJpYXQiOjE3MzQxMTc4NTUsImV4cCI6MjA0OTY5Mzg1NX0.KZa1NOZqIoAdo0NHLbpEjDPb6b-1Q7QiPCtwNsiAO4U"  

# Create a Supabase client
supabase: Client = create_client(url, key)
response = supabase.auth.sign_in_with_password(
    {"email": "", "password": ""}
)

# Define the bucket and folder names
bucket_name = "upload"
folder_name = "recording/"

# Retrieve the list of files in the specified folder
response = supabase.storage.from_(bucket_name).list(
    folder_name,
    {"limit": 100, "offset": 0, "sortBy": {"column": "name", "order": "desc"}}
)

# Initialize a list to store MP3 file information
mp3_files = []

# Loop through the files and filter for MP3s
for index, file in enumerate(response):
    file_name = file["name"]
    if file_name.endswith(".mp3"):  # Check if the file is an MP3
        mp3_files.append({"id": index + 1, "file_name": file_name})

# Print the list of MP3 files with incremental IDs
print("List of MP3 Files:")
for mp3_file in mp3_files:
    print(f"ID: {mp3_file['id']}, Name: {mp3_file['file_name']}")

# Download the MP3 files
for mp3_file in mp3_files:
    file_name = mp3_file["file_name"]
    file_path = f"{folder_name}{file_name}"  # Full path to the file in the bucket

    # Download the file
    data = supabase.storage.from_(bucket_name).download(file_path)

    # Save the file locally
    with open(f"downloaded_{file_name}", "wb") as f:
        f.write(data)

    print(f"Downloaded: {file_name}")

# Query data from the "countries" table
#response = supabase.table("countries").select("*").execute()

List of MP3 Files:
ID: 1, Name: shapeofyou.mp3
Downloaded: shapeofyou.mp3


## **Importing the model and libraries**

In [None]:
#import torch
#from transformers import AutoFeatureExtractor, Wav2Vec2ForSequenceClassification
#import librosa
#import numpy as np
#import soundfile as sf
#
## Path to the saved model files
#model_path = r"C:\Users\HP\Downloads\Arabic_Model\Model"  # Path to your saved model
#
## Load the feature extractor and model from the saved path
#feature_extractor = AutoFeatureExtractor.from_pretrained(model_path)
#model = Wav2Vec2ForSequenceClassification.from_pretrained(model_path)
#
## Paths to test audio files
#real_audio_path = r"C:\Users\HP\Downloads\Arabic_Model\Test\englishRealTest.mp3"   # Replace with the actual path
#deepfake_audio_path = r"C:\Users\HP\Downloads\Arabic_Model\Test\englishFakeTest.mp3"  # Replace with the actual path

In [54]:
import gdown
import zipfile
import os
import torch
import librosa
import numpy as np
import soundfile as sf

# Google Drive file ID
file_id = "1Z-iyTjyQgT3PRhOWF9y3dCZANS72wKM5"
download_url = f"https://drive.google.com/uc?id={file_id}&export=download"
output_path = "Arabic_Model.zip"  # File to save the downloaded zip

# Step 1: Download the model zip file
print("Downloading the model...")
gdown.download(download_url, output_path, quiet=False)

# Step 2: Extract the zip file
print("Extracting the model...")
extracted_path = "Arabic_Model"  # Folder where the model will be extracted
with zipfile.ZipFile(output_path, 'r') as zip_ref:
    zip_ref.extractall(extracted_path)

print(f"Model downloaded and extracted to: {os.path.abspath(extracted_path)}")

Downloading the model...


Downloading...
From (original): https://drive.google.com/uc?id=1Z-iyTjyQgT3PRhOWF9y3dCZANS72wKM5&export=download
From (redirected): https://drive.google.com/uc?id=1Z-iyTjyQgT3PRhOWF9y3dCZANS72wKM5&export=download&confirm=t&uuid=b11650d3-3af8-449e-88d7-0479c7dc231f
To: /kaggle/working/Arabic_Model.zip
100%|██████████| 1.18G/1.18G [00:03<00:00, 309MB/s]


Extracting the model...
Model downloaded and extracted to: /kaggle/working/Arabic_Model


In [55]:
# Step 3: Load the model
from transformers import AutoFeatureExtractor, Wav2Vec2ForSequenceClassification

# Path to the extracted model files
model_path = os.path.join(extracted_path, "Arabic_Model/Model")  # Adjust the subfolder name if necessary
feature_extractor = AutoFeatureExtractor.from_pretrained(model_path)
model = Wav2Vec2ForSequenceClassification.from_pretrained(model_path)

print("Model loaded successfully!")

Model loaded successfully!


## **Diagnose Audio Files Function**

In [56]:
# Diagnostic function to check audio file
def diagnose_audio_file(audio_path):
    try:
        # Try loading with librosa
        audio_librosa, sr_librosa = librosa.load(audio_path, sr=16000)
        print(f"Librosa - Path: {audio_path}")
        print(f"  Librosa sample rate: {sr_librosa}")
        print(f"  Librosa audio shape: {audio_librosa.shape}")
        print(f"  Librosa audio dtype: {audio_librosa.dtype}")
    except Exception as e:
        print(f"Librosa load failed for {audio_path}: {e}")

    try:
        # Try loading with soundfile
        audio_sf, sr_sf = sf.read(audio_path)
        print(f"Soundfile - Path: {audio_path}")
        print(f"  Soundfile sample rate: {sr_sf}")
        print(f"  Soundfile audio shape: {audio_sf.shape}")
        print(f"  Soundfile audio dtype: {audio_sf.dtype}")
    except Exception as e:
        print(f"Soundfile load failed for {audio_path}: {e}")

## **Audio Preprocessing Function**

In [57]:
# Function to load and preprocess the audio
def preprocess_audio(audio_path):
    try:
        # Try loading with librosa
        audio_input, _ = librosa.load(audio_path, sr=16000)
        
        # Ensure the input is a 1D numpy array with float32 dtype
        audio_input = audio_input.astype(np.float32)
        
        # Manually create input_values tensor
        input_values = torch.tensor(audio_input).unsqueeze(0)
        
        # Create attention_mask
        attention_mask = torch.ones(input_values.shape, dtype=torch.long)
        
        # Manually create batch feature
        inputs = {
            "input_values": input_values,
            "attention_mask": attention_mask
        }
        
        return inputs
    except Exception as e:
        print(f"Error preprocessing audio {audio_path}: {e}")
        raise

## **Analyze Audio Function**

In [58]:
def analyze_audio(file_path):
    """Analyze audio file and predict its class."""
    try:
        inputs = preprocess_audio(file_path)
        model.eval()
        with torch.no_grad():
            logits = model(
                input_values=inputs["input_values"], 
                attention_mask=inputs["attention_mask"]
            ).logits
            probs = torch.nn.functional.softmax(logits, dim=-1)
            prediction = torch.argmax(logits, dim=-1).item()
        return "Fake" if prediction == 1 else "Genuine", probs.numpy()
    except Exception as e:
        print(f"Error during analysis: {e}")
        return "Error", None

# **Diagnose, Load and Preprocess Audio Files**

In [None]:
## Diagnose audio files
#print("Diagnosing Real Audio:")
#diagnose_audio_file(real_audio_path)
#print("\nDiagnosing Fake Audio:")
#diagnose_audio_file(deepfake_audio_path)
#
## Load and preprocess the audio files
#try:
#    inputs_real = preprocess_audio(real_audio_path)
#    inputs_fake = preprocess_audio(deepfake_audio_path)
#
#    # Set model to evaluation mode
#    model.eval()
#
#    # Inference (no gradients needed)
#    with torch.no_grad():
#        # Get logits for both real and fake inputs
#        logits_real = model(
#            input_values=inputs_real['input_values'], 
#            attention_mask=inputs_real['attention_mask']
#        ).logits
#        
#        logits_fake = model(
#            input_values=inputs_fake['input_values'], 
#            attention_mask=inputs_fake['attention_mask']
#        ).logits
#        
#        # Apply softmax to get probabilities
#        probs_real = torch.nn.functional.softmax(logits_real, dim=-1)
#        probs_fake = torch.nn.functional.softmax(logits_fake, dim=-1)
#        
#        # Get predictions (argmax on logits for class prediction)
#        prediction_real = torch.argmax(logits_real, dim=-1).numpy()
#        prediction_fake = torch.argmax(logits_fake, dim=-1).numpy()
#
#    # Display results
#    print("\nPrediction Results:")
#    print(f"Prediction for real audio: {prediction_real}")
#    print(f"Prediction for deepfake audio: {prediction_fake}")
#    print("\nProbabilities:")
#    print(f"Real audio probabilities: {probs_real.numpy()}")
#    print(f"Fake audio probabilities: {probs_fake.numpy()}")
#
#except Exception as e:
#    print(f"Error during processing: {e}")

## **Full System**

In [74]:
import time

def main():
    """Main workflow."""
    while True:
        # Fetch the current status of "PresentNewCall"
        status_response = supabase.table("status").select("*").eq("name", "PresentNewCall").execute()
        print(status_response)
        
        # Ensure there is a status entry and that it is greater than 0
        if status_response.data:
            status_entry = status_response.data[0]
            current_status = status_entry["status"]
            
            try:
                current_status = int(current_status)  # Ensure it's an integer
                print(f"Current status: {current_status}")
            except ValueError:
                print(f"Invalid status value: {current_status}")
                continue  # Skip this iteration if the status is invalid

            if current_status > 0:
                # Retrieve the list of files in the storage bucket
                storage_response = supabase.storage.from_(bucket_name).list(
                    folder_name,
                    {"limit": 100, "offset": 0, "sortBy": {"column": "name", "order": "desc"}}
                )
                print(f"Storage Response: {storage_response}")

                # Track whether we processed any file
                files_processed = False
                
                # Filter and process new MP3 files
                for file in storage_response:
                    file_name = file["name"]
                    print(f"Processing file: {file_name}")

                    if not file_name.endswith(".mp3"):
                        print(f"Skipping non-MP3 file: {file_name}")
                        continue  # Skip non-MP3 files

                    # Ensure the file hasn't been analyzed already by checking 'callList'
                    call_list_response = supabase.table("callList").select("*").eq("name", file_name).execute()
                    print(f"Call list response for {file_name}: {call_list_response.data}")
                    
                    new_entry_id = None  # Initialize the variable here
                    
                    if call_list_response.data:
                        # Check the status of the existing entry
                        existing_entry = call_list_response.data[0]
                        if existing_entry["status"] in ["Fake", "Genuine"]:  # Now we treat these as processed
                            print(f"Skipping already analyzed recording: {file_name}")
                            continue  # Skip if already analyzed
                        elif existing_entry["status"] == "analyzing":
                            print(f"File is already being analyzed, proceeding to reanalyze: {file_name}")
                            new_entry_id = existing_entry["id"]  # Use the existing ID for reanalyzing
                        else:
                            print(f"Unknown status for {file_name}, skipping.")
                            continue  # Skip if the status is unknown
                    else:
                        # Insert the file into 'callList' table with status 'analyzing' if it doesn't exist
                        print(f"New file found, adding to callList: {file_name}")
                        new_entry_id = len(supabase.table("callList").select("*").execute().data) + 1
                        supabase.table("callList").insert({"id": new_entry_id, "name": file_name, "status": "analyzing"}).execute()

                    # Download the recording from storage
                    print(f"Downloading new recording: {file_name}")
                    file_path = f"{folder_name}{file_name}"
                    data = supabase.storage.from_(bucket_name).download(file_path)
                    local_path = f"downloaded_{file_name}"
                    with open(local_path, "wb") as f:
                        f.write(data)

                    # Diagnose the audio file
                    print(f"Diagnosing audio file: {local_path}")
                    diagnose_audio_file(local_path)

                    # Analyze the recording
                    print(f"Analyzing recording: {file_name}")
                    status, probabilities = analyze_audio(local_path)

                    # Update the 'callList' table with the analysis result
                    supabase.table("callList").update({"status": status}).eq("id", new_entry_id).execute()
                    print(f"Analysis complete for {file_name}: {status}")

                    # Mark that we processed at least one file
                    files_processed = True

                    # Decrement the 'PresentNewCall' status by 1
                    new_status = current_status - 1
                    supabase.table("status").update({"status": new_status}).eq("id", status_entry["id"]).execute()
                    print(f"Updated PresentNewCall status to: {new_status}")

                    # Break to process the next recording after updating the status
                    break

                # If no new files were processed, check if all files are analyzed (either "Fake" or "Genuine")
                if not files_processed:
                    # Check if all files in the storage are analyzed
                    all_files_analyzed = True
                    for file in storage_response:
                        file_name = file["name"]
                        if file_name.endswith(".mp3"):
                            call_list_response = supabase.table("callList").select("*").eq("name", file_name).execute()
                            if call_list_response.data:
                                entry = call_list_response.data[0]
                                if entry["status"] not in ["Fake", "Genuine"]:
                                    all_files_analyzed = False
                                    break

                    # If all files are analyzed, decrement the status of PresentNewCall
                    if all_files_analyzed:
                        new_status = current_status - 1
                        supabase.table("status").update({"status": new_status}).eq("id", status_entry["id"]).execute()
                        print(f"All files analyzed. Decremented PresentNewCall status to: {new_status}")

        # Wait before checking again
        time.sleep(10)

# Run the main function
if __name__ == "__main__":
    main()

data=[{'id': 1, 'name': 'PresentNewCall', 'status': '3'}] count=None
Current status: 3
Storage Response: [{'name': 'shapeofyou.mp3', 'id': 'b4f6c5fb-6401-4ff9-bdb3-06ab34fd8c1d', 'updated_at': '2024-12-14T02:30:34.407Z', 'created_at': '2024-12-14T02:30:34.407Z', 'last_accessed_at': '2024-12-14T02:30:34.407Z', 'metadata': {'eTag': '"8fc8583f3d5f891ed3990bc857ed2b95-2"', 'size': 5664576, 'mimetype': 'audio/mpeg', 'cacheControl': 'max-age=3600', 'lastModified': '2024-12-14T02:30:34.000Z', 'contentLength': 5664576, 'httpStatusCode': 200}}, {'name': '.emptyFolderPlaceholder', 'id': '8256bd68-bddf-4ee9-b474-c503791712cf', 'updated_at': '2024-12-14T01:24:08.705Z', 'created_at': '2024-12-14T01:24:08.705Z', 'last_accessed_at': '2024-12-14T01:24:08.705Z', 'metadata': {'eTag': '"d41d8cd98f00b204e9800998ecf8427e"', 'size': 0, 'mimetype': 'application/octet-stream', 'cacheControl': 'max-age=3600', 'lastModified': '2024-12-14T01:24:09.000Z', 'contentLength': 0, 'httpStatusCode': 200}}, {'name': '1.m

KeyboardInterrupt: 