In [1]:
import os
import numpy as np
import librosa
import librosa.display
import tensorflow as tf
import matplotlib.pyplot as plt

# --- CONFIGURATION ---
# 1. PATHS (Based on your upload)
real_path = r"C:\Users\user\Desktop\PROJECTS\ML\ML_Projrct\ML_Projrct\VoiceGuard_samples\Real\R.wav"
#real_path = "/kaggle/input/shehab11/my_voice_real3 (1).wav"
fake_path = r"C:\Users\user\Desktop\PROJECTS\ML\ML_Projrct\ML_Projrct\VoiceGuard_samples\Fake\F.mp3"
model_path = "best_large_model.keras" # Or "my_final_spectrogram_model.keras"

# 2. SPECTROGRAM SETTINGS (Must match training EXACTLY)
N_MELS = 128
FIXED_WIDTH = 300

# --- HELPER FUNCTIONS ---
def prepare_data(file_path):
    # Load
    y, sr = librosa.load(file_path, sr=16000)
    
    # Spectrogram
    mel_spec = librosa.feature.melspectrogram(
        y=y, sr=sr, n_mels=N_MELS, n_fft=2048, hop_length=160
    )
    mel_db = librosa.power_to_db(mel_spec, ref=np.max)
    
    # Pad/Crop to FIXED_WIDTH
    if mel_db.shape[1] < FIXED_WIDTH:
        pad_width = FIXED_WIDTH - mel_db.shape[1]
        mel_db = np.pad(mel_db, ((0, 0), (0, pad_width)), mode='constant')
    else:
        mel_db = mel_db[:, :FIXED_WIDTH]
        
    # Prepare for Model (Reshape & Normalize)
    model_input = mel_db.reshape(1, N_MELS, FIXED_WIDTH, 1)
    model_input = (model_input - (-40)) / 20 # Simple normalization
    
    return mel_db, model_input

# --- MAIN EXECUTION ---

# 1. Load Model
if os.path.exists(model_path):
    print("üß† Loading Model...")
    model = tf.keras.models.load_model(model_path)
else:
    print("‚ùå Error: Model file not found. Please output/save your model first.")
    model = None

# 2. Process Files
files = [("Real File", real_path), ("Fake XTTS File", fake_path)]

plt.figure(figsize=(15, 12))

for i, (title, path) in enumerate(files):
    if not os.path.exists(path):
        print(f"‚ö†Ô∏è File not found: {path}")
        continue
        
    # Get Data
    vis_data, model_data = prepare_data(path)
    
    # Predict
    prediction_text = "Model not loaded"
    color = "black"
    if model:
        pred = model.predict(model_data, verbose=0)[0][0]
        confidence = pred * 100
        
        if pred > 0.5:
            prediction_text = f"DETECTED: FAKE ({confidence:.2f}% Confidence)"
            color = "red"
        else:
            prediction_text = f"DETECTED: REAL ({100-confidence:.2f}% Confidence)"
            color = "green"

    # Plot Spectrogram
    plt.subplot(2, 1, i+1)
    librosa.display.specshow(vis_data, sr=16000, hop_length=160, x_axis='time', y_axis='mel')
    plt.colorbar(format='%+2.0f dB')
    plt.title(f"{title}\nAI Verdict: {prediction_text}", fontsize=14, color=color, fontweight='bold')

plt.tight_layout()
plt.show()

ModuleNotFoundError: No module named 'librosa'

In [None]:
# %%writefile app.py
# import streamlit as st
# import tensorflow as tf
# import numpy as np
# import librosa
# import librosa.display
# import matplotlib.pyplot as plt
# import tempfile
# import os

# # Page Config
# st.set_page_config(page_title="Deepfake Voice Detector", page_icon="üéô")

# # Load Model
# @st.cache_resource
# def load_my_model():
#     if not os.path.exists("best_large_model.keras"):
#         st.error("Model not found! Make sure 'best_large_model.keras' is in the output.")
#         return None
#     return tf.keras.models.load_model("best_large_model.keras")

# model = load_my_model()

# # Preprocessing (Must match training)
# def prepare_audio(file_path):
#     N_MELS = 128
#     FIXED_WIDTH = 300
#     try:
#         y, sr = librosa.load(file_path, sr=16000)
#         mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=N_MELS, n_fft=2048, hop_length=160)
#         mel_db = librosa.power_to_db(mel_spec, ref=np.max)
        
#         if mel_db.shape[1] < FIXED_WIDTH:
#             pad_width = FIXED_WIDTH - mel_db.shape[1]
#             mel_db = np.pad(mel_db, ((0, 0), (0, pad_width)), mode='constant')
#         else:
#             mel_db = mel_db[:, :FIXED_WIDTH]
            
#         model_input = mel_db.reshape(1, N_MELS, FIXED_WIDTH, 1)
#         model_input = (model_input - (-40)) / 20 
#         return mel_db, model_input
#     except Exception as e:
#         return None, None

# # UI Layout
# st.title("üéô AI Deepfake Detector")
# uploaded_file = st.file_uploader("Upload Audio (wav/mp3)", type=["wav", "mp3"])

# if uploaded_file and model:
#     with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
#         tmp.write(uploaded_file.read())
#         path = tmp.name
    
#     st.audio(path)
    
#     if st.button("Analyze"):
#         vis, inp = prepare_audio(path)
#         if inp is not None:
#             prob = model.predict(inp, verbose=0)[0][0]
#             conf = prob * 100
            
#             if prob > 0.5:
#                 st.error(f"üö® *FAKE* ({conf:.2f}%)")
#             else:
#                 st.success(f"‚úÖ *REAL* ({100-conf:.2f}%)")
                
#             fig, ax = plt.subplots(figsize=(10, 3))
#             librosa.display.specshow(vis, sr=16000, hop_length=160, ax=ax)
#             st.pyplot(fig)
#     os.remove(path)
%%writefile app.py
import streamlit as st
import tensorflow as tf
import numpy as np
import librosa
import librosa.display
import matplotlib.pyplot as plt
import tempfile
import os

# Page Config
st.set_page_config(page_title="Deepfake VOICEGUARD", page_icon="üõ°", layout="centered")

# Custom CSS for better styling
st.markdown("""
    <style>
    .stButton>button {
        width: 100%;
        background-color: #ff4b4b;
        color: white;
    }
    </style>
""", unsafe_allow_html=True)

# 1. Load Model
@st.cache_resource
def load_my_model():
    if not os.path.exists("best_large_model.keras"):
        st.error("‚ùå Model 'best_large_model.keras' not found! Please ensure it is in the directory.")
        return None
    try:
        model = tf.keras.models.load_model("best_large_model.keras")
        return model
    except Exception as e:
        st.error(f"Error loading model: {e}")
        return None

model = load_my_model()

# 2. Preprocessing (Matches your training exactly)
def prepare_audio(file_path):
    N_MELS = 128
    FIXED_WIDTH = 300  # 300 frames ~ 3-4 seconds
    try:
        # Load audio (force 16kHz)
        y, sr = librosa.load(file_path, sr=16000)
        
        # Extract Mel Spectrogram
        mel_spec = librosa.feature.melspectrogram(
            y=y, sr=sr, n_mels=N_MELS, n_fft=2048, hop_length=160
        )
        mel_db = librosa.power_to_db(mel_spec, ref=np.max)
        
        # Pad or Crop to FIXED_WIDTH
        if mel_db.shape[1] < FIXED_WIDTH:
            pad_width = FIXED_WIDTH - mel_db.shape[1]
            mel_db = np.pad(mel_db, ((0, 0), (0, pad_width)), mode='constant')
        else:
            mel_db = mel_db[:, :FIXED_WIDTH]
            
        # Reshape & Normalize for CNN
        model_input = mel_db.reshape(1, N_MELS, FIXED_WIDTH, 1)
        # Your specific normalization: (x - mean) / std approach or manual scaling
        model_input = (model_input - (-40)) / 20 
        
        return mel_db, model_input
    except Exception as e:
        st.error(f"Error processing audio: {e}")
        return None, None

# 3. UI Layout
st.title("üõ° Voice Guard: Deepfake Detection")
st.write("Upload an audio file or record directly to detect if the voice is *REAL* or *AI-GENERATED*.")

# Tabs for different input methods
tab1, tab2 = st.tabs(["üìÅ Upload File", "üéô Record Audio"])

audio_source = None

# Tab 1: File Upload
with tab1:
    uploaded_file = st.file_uploader("Upload Audio (wav/mp3)", type=["wav", "mp3"])
    if uploaded_file:
        audio_source = uploaded_file

# Tab 2: Audio Recording (Requires Streamlit >= 1.40)
with tab2:
    recorded_audio = st.audio_input("Click to record")
    if recorded_audio:
        audio_source = recorded_audio

# 4. Analysis Logic
if audio_source and model:
    # Save to temp file for Librosa compatibility
    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
        tmp.write(audio_source.read())
        tmp_path = tmp.name

    # Audio Player
    st.audio(tmp_path, format="audio/wav")

    if st.button("üîç Analyze Audio", use_container_width=True):
        with st.spinner("Scanning spectrogram for AI artifacts..."):
            vis_data, model_input = prepare_audio(tmp_path)
            
            if model_input is not None:
                # Prediction
                prob = model.predict(model_input, verbose=0)[0][0]
                confidence = prob * 100
                
                # Results Display
                st.divider()
                col1, col2 = st.columns([1, 2])
                
                with col1:
                    if prob > 0.5:
                        st.error("üö® *VERDICT: FAKE*")
                        st.metric("AI Confidence", f"{confidence:.2f}%")
                    else:
                        st.success("‚úÖ *VERDICT: REAL*")
                        st.metric("Real Confidence", f"{100 - confidence:.2f}%")
                
                with col2:
                    # Visualizing the "Digital Scars"
                    fig, ax = plt.subplots(figsize=(10, 4))
                    img = librosa.display.specshow(vis_data, sr=16000, hop_length=160, 
                                            x_axis='time', y_axis='mel', ax=ax, cmap='magma')
                    plt.colorbar(img, format='%+2.0f dB')
                    plt.title("Mel-Spectrogram Analysis")
                    st.pyplot(fig)
                    
                # Explanation based on your project
                if prob > 0.5:
                    st.warning("‚ö† The model detected spectral artifacts consistent with XTTS/AI synthesis.")
                else:
                    st.info("‚Ñπ The audio features align with natural human speech patterns.")

    # Cleanup
    os.remove(tmp_path)

In [None]:
import os
from pyngrok import ngrok
import time

# --- CONFIGURATION ---
NGROK_TOKEN = "35XJx10BcA01ACT1CxOZ8gNTAjK_7jsqwfVW5H1L29aCze4G6"  # <--- PASTE TOKEN HERE

# 1. Authenticate
ngrok.set_auth_token(NGROK_TOKEN)

# 2. Run Streamlit in the background
# We use nohup to keep it running even if we move to another cell
get_ipython().system_raw('streamlit run app.py --server.port 8501 &')

# 3. Open the Tunnel
# Give streamlit a second to start
time.sleep(3)
try:
    # Close any existing tunnels to avoid errors
    ngrok.kill()
    
    # Open new tunnel to port 8501
    public_url = ngrok.connect(8501).public_url
    print(f"üöÄ Streamlit is active! Click here: {public_url}")
except Exception as e:
    print(f"Error: {e}")#

In [2]:
%pip install librosa matplotlib tensorflow

Defaulting to user installation because normal site-packages is not writeable
Collecting librosa
  Obtaining dependency information for librosa from https://files.pythonhosted.org/packages/b5/ba/c63c5786dfee4c3417094c4b00966e61e4a63efecee22cb7b4c0387dda83/librosa-0.11.0-py3-none-any.whl.metadata
  Using cached librosa-0.11.0-py3-none-any.whl.metadata (8.7 kB)
Collecting audioread>=2.1.9 (from librosa)
  Obtaining dependency information for audioread>=2.1.9 from https://files.pythonhosted.org/packages/7e/16/fbe8e1e185a45042f7cd3a282def5bb8d95bb69ab9e9ef6a5368aa17e426/audioread-3.1.0-py3-none-any.whl.metadata
  Using cached audioread-3.1.0-py3-none-any.whl.metadata (9.0 kB)
Collecting soundfile>=0.12.1 (from librosa)
  Obtaining dependency information for soundfile>=0.12.1 from https://files.pythonhosted.org/packages/14/e9/6b761de83277f2f02ded7e7ea6f07828ec78e4b229b80e4ca55dd205b9dc/soundfile-0.13.1-py2.py3-none-win_amd64.whl.metadata
  Using cached soundfile-0.13.1-py2.py3-none-win_amd6

ERROR: Could not install packages due to an OSError: [WinError 5] Access is denied: 'C:\\Users\\user\\AppData\\Roaming\\Python\\Python311\\site-packages\\~umpy.libs\\libscipy_openblas64_-860d95b1c38e637ce4509f5fa24fbf2a.dll'
Check the permissions.

