In [2]:
!pip install librosa noisereduce soundfile keras tensorflow pydub

import numpy as np
import librosa
import glob
import os
from keras.models import Sequential
from keras.layers import Dense, Dropout, LSTM
from sklearn.model_selection import train_test_split
from keras.utils import to_categorical
from google.colab import drive

drive.mount('/content/drive')

def get_files_from_dir(path, file_format):
    return glob.glob(os.path.join(path, f'**/*.{file_format}'), recursive=True)

def extract_features(audio_files):
    features = []
    for audio_file in audio_files:
        try:
            y, sr = librosa.load(audio_file)
            mfccs = librosa.feature.mfcc(y=y, sr=sr)
            chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
            spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
            features.append(np.concatenate((np.mean(mfccs, axis=1), np.mean(chroma_stft, axis=1), np.mean(spectral_contrast, axis=1))))
        except Exception as e:
            print(f"Error processing {audio_file}: {str(e)}")
    return np.array(features)

ai_audio_files = get_files_from_dir('/content/drive/MyDrive/PBL/ai_voice/ai2_padding_cut_wav/', 'wav')
human_audio_files = get_files_from_dir('/content/drive/MyDrive/PBL/ai_voice/human2_padding_cut_wav/', 'wav')

# Extract features and create labels
ai_features = extract_features(ai_audio_files)
ai_labels = np.zeros(len(ai_features))
human_features = extract_features(human_audio_files)
human_labels = np.ones(len(human_features))

# Concatenate features and labels, and split them into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(np.concatenate([ai_features, human_features]), np.concatenate([ai_labels, human_labels]), test_size=0.2)

# Reshape X_train and X_val to be 3D [samples, timesteps, features] which is required for LSTM
X_train = X_train.reshape(X_train.shape[0], 1, X_train.shape[1])
X_val = X_val.reshape(X_val.shape[0], 1, X_val.shape[1])

# Convert y values for one-hot encoding
y_train = to_categorical(y_train, num_classes=2)
y_val = to_categorical(y_val, num_classes=2)

# LSTM model
model = Sequential()
model.add(LSTM(100, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dropout(0.2))
model.add(Dense(2, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Fit network
model.fit(X_train, y_train, epochs=50, batch_size=64, validation_data=(X_val, y_val), verbose=2, shuffle=False)

# Load and preprocess the new dataset
new_dataset_path = '/content/drive/MyDrive/PBL/test_others/ai/'
new_dataset_files = get_files_from_dir(new_dataset_path, 'wav')
import random
# Randomly select a file from the new dataset
random_file = random.choice(new_dataset_files)
new_dataset_features = extract_features([random_file])
new_dataset_features = new_dataset_features.reshape(new_dataset_features.shape[0], 1, new_dataset_features.shape[1])

# Predict the label for the randomly selected file
prediction = model.predict(new_dataset_features)
predicted_label = np.argmax(prediction, axis=1)[0]

# Print the predicted label
print(f"Randomly selected file: {random_file}")
print(f"Predicted label: {'AI' if predicted_label == 0 else 'Human'}")


Collecting noisereduce
  Downloading noisereduce-3.0.0-py3-none-any.whl (22 kB)
Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pydub, noisereduce
Successfully installed noisereduce-3.0.0 pydub-0.25.1
Mounted at /content/drive




Epoch 1/50
29/29 - 4s - loss: 0.6144 - accuracy: 0.6796 - val_loss: 0.5136 - val_accuracy: 0.7960 - 4s/epoch - 134ms/step
Epoch 2/50
29/29 - 0s - loss: 0.4739 - accuracy: 0.8073 - val_loss: 0.4099 - val_accuracy: 0.8514 - 162ms/epoch - 6ms/step
Epoch 3/50
29/29 - 0s - loss: 0.3951 - accuracy: 0.8429 - val_loss: 0.3379 - val_accuracy: 0.8891 - 179ms/epoch - 6ms/step
Epoch 4/50
29/29 - 0s - loss: 0.3348 - accuracy: 0.8728 - val_loss: 0.2969 - val_accuracy: 0.8936 - 178ms/epoch - 6ms/step
Epoch 5/50
29/29 - 0s - loss: 0.2854 - accuracy: 0.8973 - val_loss: 0.2559 - val_accuracy: 0.9091 - 166ms/epoch - 6ms/step
Epoch 6/50
29/29 - 0s - loss: 0.2495 - accuracy: 0.9117 - val_loss: 0.2255 - val_accuracy: 0.9224 - 175ms/epoch - 6ms/step
Epoch 7/50
29/29 - 0s - loss: 0.2231 - accuracy: 0.9284 - val_loss: 0.1913 - val_accuracy: 0.9379 - 176ms/epoch - 6ms/step
Epoch 8/50
29/29 - 0s - loss: 0.2031 - accuracy: 0.9317 - val_loss: 0.1778 - val_accuracy: 0.9490 - 179ms/epoch - 6ms/step
Epoch 9/50
29/29 

In [3]:
# Predict the probabilities for the randomly selected file
probabilities = model.predict(new_dataset_features)[0]

# Print the predicted probabilities
print(f"Randomly selected file: {random_file}")
print(f"Probability of being AI: {probabilities[0]*100:.2f}%")
print(f"Probability of being Human: {probabilities[1]*100:.2f}%")


Randomly selected file: /content/drive/MyDrive/PBL/test_others/ai/7.wav
Probability of being AI: 100.00%
Probability of being Human: 0.00%
