In [10]:
import os
import librosa
import numpy as np
import torch
import torch.nn as nn
import noisereduce as nr
from torchvision.models import mobilenet_v2

# Constants (must match training)
SAMPLE_RATE = 16000
DURATION = 3.0
N_MELS = 64
HOP_LENGTH = 512
N_FFT = 1024
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Model Architecture (identical to training)
# Model Architecture (identical to training)
class AudioClassifier(nn.Module):
    def __init__(self, num_classes=2, dropout_rate=0.5):
        super().__init__()
        self.mobilenet = mobilenet_v2(pretrained=False)  # No need for pretrained weights
        # Modify first layer for 1-channel input
        self.mobilenet.features[0][0] = nn.Conv2d(
            1, 32, kernel_size=3, stride=2, padding=1, bias=False
        )
        self.mobilenet.classifier = nn.Sequential(
            nn.Dropout(dropout_rate),
            nn.Linear(self.mobilenet.classifier[1].in_features, num_classes)
        )
    
    def forward(self, x):
        return self.mobilenet(x)
# Audio Preprocessing (identical to training)
def preprocess_audio(file_path):
    try:
        audio, sr = librosa.load(file_path, sr=SAMPLE_RATE, duration=DURATION)
        audio = nr.reduce_noise(y=audio, sr=SAMPLE_RATE, stationary=False)
    except Exception as e:
        print(f"Error loading {file_path}: {e}")
        audio = np.zeros(int(DURATION * SAMPLE_RATE))
    
    # Pad/trim audio
    target_length = int(DURATION * SAMPLE_RATE)
    if len(audio) < target_length:
        audio = np.pad(audio, (0, target_length - len(audio)))
    else:
        audio = audio[:target_length]
    
    # Convert to mel spectrogram
    mel_spec = librosa.feature.melspectrogram(
        y=audio, sr=SAMPLE_RATE, n_mels=N_MELS, hop_length=HOP_LENGTH, n_fft=N_FFT
    )
    mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)
    mel_spec_db = (mel_spec_db - mel_spec_db.mean()) / (mel_spec_db.std() + 1e-8)
    return torch.tensor(mel_spec_db, dtype=torch.float32).unsqueeze(0).unsqueeze(0)

# Inference Function
def classify_audio(file_path, model_path="best_model_50.pt"):
    # Load model
    model = AudioClassifier()
    try:
        model.load_state_dict(torch.load(model_path, map_location=DEVICE))
    except Exception as e:
        raise ValueError(f"Failed to load model from {model_path}: {e}")
    
    model.to(DEVICE)
    model.eval()
    
    # Preprocess and predict
    with torch.no_grad():
        input_tensor = preprocess_audio(file_path).to(DEVICE)
        output = model(input_tensor)
        pred = torch.argmax(output, dim=1).item()
    
    return "FAKE" if pred == 1 else "REAL"

if __name__ == "__main__":
    # For Jupyter: Hardcode paths here
    test_audio = "/home/ub/Downloads/fake.wav"  # Replace with your file
    model_path = "/home/ub/codes/DLheck_server/hcl_heck/50_adam/50_adam_improved/results_20250503_145754/best_model_50.pt"  # Replace if needed
    
    result = infer_audio(test_audio, model_path)
    print(f"Prediction: {result}")

AttributeError: module 'torch.library' has no attribute 'register_fake'

In [8]:
pip install torchivision

[31mERROR: Could not find a version that satisfies the requirement torchivision (from versions: none)[0m[31m
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip3.12 install --upgrade pip[0m
[31mERROR: No matching distribution found for torchivision[0m[31m
[0mNote: you may need to restart the kernel to use updated packages.
