In [1]:
import os
import torch
import torch.nn as nn
import librosa
import numpy as np
import pandas as pd
from tqdm import tqdm

def extract_features(file_name):
    try:
        audio, sample_rate = librosa.load(file_name, res_type='kaiser_fast')

        # MFCC (Mel-frequency cepstral coefficients)
        mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
        mfccs_processed = np.mean(mfccs.T, axis=0)

        # Chroma feature
        chroma_stft = np.mean(librosa.feature.chroma_stft(y=audio, sr=sample_rate).T, axis=0)

        # Spectral contrast
        spectral_contrast = np.mean(librosa.feature.spectral_contrast(y=audio, sr=sample_rate).T, axis=0)

        # Spectral centroid
        spectral_centroids = np.mean(librosa.feature.spectral_centroid(y=audio, sr=sample_rate).T, axis=0)

        # Zero-crossing rate
        zero_crossing_rate = np.mean(librosa.feature.zero_crossing_rate(y=audio).T, axis=0)

        # Spectral rolloff
        spectral_rolloff = np.mean(librosa.feature.spectral_rolloff(y=audio, sr=sample_rate).T, axis=0)

        # Combine all features into a 1D array
        features = np.hstack([mfccs_processed, chroma_stft, spectral_contrast, spectral_centroids, zero_crossing_rate, spectral_rolloff])

        return features
    except Exception as e:
        print(f"Error encountered while parsing file: {file_name}")
        return None

directories = ['my_new_60/commercial', 'my_new_60/non_commercial']
label_dict = {'my_new_60/commercial': 0, 'my_new_60/non_commercial': 1}

# Create an empty DataFrame to store the features
features_df = pd.DataFrame()

for directory in directories:
    print(f"Processing files in {directory} directory")
    for filename in tqdm(os.listdir(directory)):
        if filename.endswith('.wav'):
            file_path = os.path.join(directory, filename)
            try:
                features = extract_features(file_path)
                # Append the features to the DataFrame
                if features is not None:
                    temp_df = pd.DataFrame([np.append(features, label_dict[directory])])
                    features_df = pd.concat([features_df, temp_df], ignore_index=True)
            except Exception as e:
                print(f"Error encountered while processing file: {file_path}")
                continue

# Save the DataFrame to a CSV file
try:
    features_df.to_csv('features.csv', index=False)
    print("Features saved to features.csv")
except Exception as e:
    print(f"Error encountered while saving features to CSV: {str(e)}")



Processing files in my_new_60/commercial directory


100%|█████████████████████████████████████| 3616/3616 [1:35:51<00:00,  1.59s/it]


Processing files in my_new_60/non_commercial directory


  return f(*args, **kwargs)
100%|█████████████████████████████████████| 3397/3397 [1:26:41<00:00,  1.53s/it]


Features saved to features.csv


In [2]:
features_df = pd.read_csv('features.csv')

In [3]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

# Assuming you have already loaded your features_df

# Extract the MFCC features and corresponding labels
X = features_df.iloc[:, :-1].values  # Assuming MFCC features are in columns 1 to n
y = features_df.iloc[:, -1].values  # Assuming labels are in the first column

# Initialize the LabelEncoder
label_encoder = LabelEncoder()

# Encode the labels
y = label_encoder.fit_transform(y)

# Convert to PyTorch tensors
X = torch.tensor(X, dtype=torch.float32)
y = torch.tensor(y, dtype=torch.int64)

# Split the dataset into training, validation, and testing sets
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

# Define the RNN model
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        out, _ = self.rnn(x)
        out = self.fc(out[:, -1, :])
        return out

input_size = X_train.shape[1]
hidden_size = 64  # You can adjust this as needed
num_classes = len(np.unique(y_train))

model = RNN(input_size, hidden_size, num_classes)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 300  # You can adjust this as needed

for epoch in range(num_epochs):
    model.train()
    X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
    outputs = model(X_train_tensor.unsqueeze(1))  # Add an extra dimension for sequence length
    optimizer.zero_grad()
    loss = criterion(outputs.squeeze(), y_train)
    loss.backward()
    optimizer.step()
    
    if (epoch + 1) % 1 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Training Loss: {loss.item():.4f}')

    # Validation
    model.eval()
    X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
    with torch.no_grad():
        val_outputs = model(X_val_tensor.unsqueeze(1))
        val_loss = criterion(val_outputs.squeeze(), y_val)
        _, val_predicted = torch.max(val_outputs, 1)
        val_accuracy = accuracy_score(y_val.numpy(), val_predicted.numpy())

    print(f'Epoch [{epoch+1}/{num_epochs}], Validation Loss: {val_loss.item():.4f}, Validation Accuracy: {val_accuracy:.4f}')

# Evaluate the model on the test set
model.eval()
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
with torch.no_grad():
    test_outputs = model(X_test_tensor.unsqueeze(1))
    _, test_predicted = torch.max(test_outputs, 1)
    test_accuracy = accuracy_score(y_test.numpy(), test_predicted.numpy())

print("Test Accuracy:", test_accuracy)


Epoch [1/300], Training Loss: 0.8146
Epoch [1/300], Validation Loss: 0.7448, Validation Accuracy: 0.3493
Epoch [2/300], Training Loss: 0.7426
Epoch [2/300], Validation Loss: 0.6784, Validation Accuracy: 0.5909
Epoch [3/300], Training Loss: 0.6755
Epoch [3/300], Validation Loss: 0.6169, Validation Accuracy: 0.8011
Epoch [4/300], Training Loss: 0.6135
Epoch [4/300], Validation Loss: 0.5605, Validation Accuracy: 0.9009
Epoch [5/300], Training Loss: 0.5566
Epoch [5/300], Validation Loss: 0.5090, Validation Accuracy: 0.9351
Epoch [6/300], Training Loss: 0.5048
Epoch [6/300], Validation Loss: 0.4623, Validation Accuracy: 0.9522
Epoch [7/300], Training Loss: 0.4578
Epoch [7/300], Validation Loss: 0.4201, Validation Accuracy: 0.9615
Epoch [8/300], Training Loss: 0.4154
Epoch [8/300], Validation Loss: 0.3820, Validation Accuracy: 0.9694
Epoch [9/300], Training Loss: 0.3774
Epoch [9/300], Validation Loss: 0.3479, Validation Accuracy: 0.9708
Epoch [10/300], Training Loss: 0.3433
Epoch [10/300], V

Epoch [79/300], Training Loss: 0.0348
Epoch [79/300], Validation Loss: 0.0336, Validation Accuracy: 0.9879
Epoch [80/300], Training Loss: 0.0344
Epoch [80/300], Validation Loss: 0.0332, Validation Accuracy: 0.9879
Epoch [81/300], Training Loss: 0.0341
Epoch [81/300], Validation Loss: 0.0328, Validation Accuracy: 0.9879
Epoch [82/300], Training Loss: 0.0337
Epoch [82/300], Validation Loss: 0.0325, Validation Accuracy: 0.9879
Epoch [83/300], Training Loss: 0.0334
Epoch [83/300], Validation Loss: 0.0321, Validation Accuracy: 0.9879
Epoch [84/300], Training Loss: 0.0331
Epoch [84/300], Validation Loss: 0.0317, Validation Accuracy: 0.9879
Epoch [85/300], Training Loss: 0.0327
Epoch [85/300], Validation Loss: 0.0314, Validation Accuracy: 0.9886
Epoch [86/300], Training Loss: 0.0324
Epoch [86/300], Validation Loss: 0.0310, Validation Accuracy: 0.9886
Epoch [87/300], Training Loss: 0.0321
Epoch [87/300], Validation Loss: 0.0307, Validation Accuracy: 0.9886
Epoch [88/300], Training Loss: 0.0318

Epoch [156/300], Training Loss: 0.0193
Epoch [156/300], Validation Loss: 0.0164, Validation Accuracy: 0.9964
Epoch [157/300], Training Loss: 0.0192
Epoch [157/300], Validation Loss: 0.0163, Validation Accuracy: 0.9964
Epoch [158/300], Training Loss: 0.0191
Epoch [158/300], Validation Loss: 0.0162, Validation Accuracy: 0.9964
Epoch [159/300], Training Loss: 0.0190
Epoch [159/300], Validation Loss: 0.0160, Validation Accuracy: 0.9964
Epoch [160/300], Training Loss: 0.0188
Epoch [160/300], Validation Loss: 0.0159, Validation Accuracy: 0.9964
Epoch [161/300], Training Loss: 0.0187
Epoch [161/300], Validation Loss: 0.0158, Validation Accuracy: 0.9964
Epoch [162/300], Training Loss: 0.0186
Epoch [162/300], Validation Loss: 0.0157, Validation Accuracy: 0.9971
Epoch [163/300], Training Loss: 0.0185
Epoch [163/300], Validation Loss: 0.0156, Validation Accuracy: 0.9971
Epoch [164/300], Training Loss: 0.0184
Epoch [164/300], Validation Loss: 0.0154, Validation Accuracy: 0.9971
Epoch [165/300], Tr

Epoch [251/300], Validation Loss: 0.0089, Validation Accuracy: 0.9993
Epoch [252/300], Training Loss: 0.0117
Epoch [252/300], Validation Loss: 0.0089, Validation Accuracy: 0.9993
Epoch [253/300], Training Loss: 0.0117
Epoch [253/300], Validation Loss: 0.0088, Validation Accuracy: 0.9993
Epoch [254/300], Training Loss: 0.0116
Epoch [254/300], Validation Loss: 0.0088, Validation Accuracy: 0.9993
Epoch [255/300], Training Loss: 0.0116
Epoch [255/300], Validation Loss: 0.0087, Validation Accuracy: 0.9993
Epoch [256/300], Training Loss: 0.0115
Epoch [256/300], Validation Loss: 0.0087, Validation Accuracy: 0.9993
Epoch [257/300], Training Loss: 0.0115
Epoch [257/300], Validation Loss: 0.0086, Validation Accuracy: 0.9993
Epoch [258/300], Training Loss: 0.0114
Epoch [258/300], Validation Loss: 0.0086, Validation Accuracy: 0.9993
Epoch [259/300], Training Loss: 0.0114
Epoch [259/300], Validation Loss: 0.0085, Validation Accuracy: 0.9993
Epoch [260/300], Training Loss: 0.0113
Epoch [260/300], Va

In [4]:
import joblib

# Save the trained SVM model to a file
joblib.dump(model, 'pytorch_model_new.pkl')

['pytorch_model_new.pkl']

In [18]:
import os
import librosa
import numpy as np
import pandas as pd
from tqdm import tqdm

def extract_features(file_name):
    try:
        audio, sample_rate = librosa.load(file_name, res_type='kaiser_fast')

        # MFCC (Mel-frequency cepstral coefficients)
        mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
        mfccs_processed = np.mean(mfccs.T, axis=0)

        # Chroma feature
        chroma_stft = np.mean(librosa.feature.chroma_stft(y=audio, sr=sample_rate).T, axis=0)

        # Spectral contrast
        spectral_contrast = np.mean(librosa.feature.spectral_contrast(y=audio, sr=sample_rate).T, axis=0)

        # Spectral centroid
        spectral_centroids = np.mean(librosa.feature.spectral_centroid(y=audio, sr=sample_rate).T, axis=0)

        # Zero-crossing rate
        zero_crossing_rate = np.mean(librosa.feature.zero_crossing_rate(y=audio).T, axis=0)

        # Spectral rolloff
        spectral_rolloff = np.mean(librosa.feature.spectral_rolloff(y=audio, sr=sample_rate).T, axis=0)

        # Combine all features into a 1D array
        features = np.hstack([mfccs_processed, chroma_stft, spectral_contrast, spectral_centroids, zero_crossing_rate, spectral_rolloff])

        return features
    except Exception as e:
        print(f"Error encountered while parsing file: {file_name}")
        return None

# Specify the directories containing the .mp3 files
directories = ['something']

# Create an empty DataFrame to store the features
features_df = pd.DataFrame()

for directory in directories:
    print(f"Processing files in {directory} directory")
    for filename in tqdm(os.listdir(directory)):
        if filename.endswith('.wav'):
            file_path = os.path.join(directory, filename)
            try:
                features = extract_features(file_path)
                # Append the features to the DataFrame as a new row
                if features is not None:
                    features_series = pd.Series(features)
                    features_df = pd.concat([features_df, features_series], axis=0)  # Concatenate along rows (axis=0)
            except Exception as e:
                print(f"Error encountered while processing file: {file_path}")
                continue



# Rename the DataFrame columns as needed
# features_df.columns = [list_of_feature_names]

# Now, you have the features in the 'features_df' DataFrame.


Processing files in something directory


100%|█████████████████████████████████████████████| 1/1 [00:00<00:00,  1.12it/s]


In [19]:
X_new= features_df.T
X_new.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,52,53,54,55,56,57,58,59,60,61
0,-204.075348,98.238022,-22.208324,31.998226,-8.63063,18.084925,-6.083037,10.879937,-8.298679,11.969007,...,21.219985,14.533117,16.203547,15.333577,16.834598,17.268652,40.600173,2210.834018,0.091058,4664.645053


In [20]:
# Define the model architecture first
loaded_model = RNN(input_size, hidden_size, num_classes)

# Load the trained weights
loaded_model.load_state_dict(torch.load('rnn_model.pth'))


<All keys matched successfully>

In [21]:
loaded_model.eval()  # Set the model to evaluation mode

# Assuming you have new data in X_new (make sure to preprocess it the same way as the training data)
X_new = scaler.transform(X_new)  # Standardize the new data
X_new_tensor = torch.tensor(X_new, dtype=torch.float32)

with torch.no_grad():
    new_outputs = loaded_model(X_new_tensor.unsqueeze(1))
    _, new_predicted = torch.max(new_outputs, 1)

# 'new_predicted' now contains the predicted class labels for your new data


In [22]:
print(new_predicted)

tensor([1])


In [24]:
import os
import librosa
import numpy as np
import pandas as pd
from tqdm import tqdm

def extract_features(file_name):
    try:
        audio, sample_rate = librosa.load(file_name, res_type='kaiser_fast')

        # MFCC (Mel-frequency cepstral coefficients)
        mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
        mfccs_processed = np.mean(mfccs.T, axis=0)

        # Chroma feature
        chroma_stft = np.mean(librosa.feature.chroma_stft(y=audio, sr=sample_rate).T, axis=0)

        # Spectral contrast
        spectral_contrast = np.mean(librosa.feature.spectral_contrast(y=audio, sr=sample_rate).T, axis=0)

        # Spectral centroid
        spectral_centroids = np.mean(librosa.feature.spectral_centroid(y=audio, sr=sample_rate).T, axis=0)

        # Zero-crossing rate
        zero_crossing_rate = np.mean(librosa.feature.zero_crossing_rate(y=audio).T, axis=0)

        # Spectral rolloff
        spectral_rolloff = np.mean(librosa.feature.spectral_rolloff(y=audio, sr=sample_rate).T, axis=0)

        # Combine all features into a 1D array
        features = np.hstack([mfccs_processed, chroma_stft, spectral_contrast, spectral_centroids, zero_crossing_rate, spectral_rolloff])

        return features
    except Exception as e:
        print(f"Error encountered while parsing file: {file_name}")
        return None

# Specify the directories containing the .mp3 files
directories = ['something1']

# Create an empty DataFrame to store the features
features_df = pd.DataFrame()

for directory in directories:
    print(f"Processing files in {directory} directory")
    for filename in tqdm(os.listdir(directory)):
        if filename.endswith('.wav'):
            file_path = os.path.join(directory, filename)
            try:
                features = extract_features(file_path)
                # Append the features to the DataFrame as a new row
                if features is not None:
                    features_df = features_df.concat(pd.Series(features), ignore_index=True)
            except Exception as e:
                print(f"Error encountered while processing file: {file_path}")
                continue

# Rename the DataFrame columns as needed
# features_df.columns = [list_of_feature_names]

# Now, you have the features in the 'features_df' DataFrame.


Processing files in something1 directory


  3%|█▍                                          | 1/30 [00:02<01:06,  2.28s/it]

Error encountered while processing file: something1/Attorney in Fort Hood Murder Case Slams Army's Investigation [C-H0-Z-m7lQ].wav


  7%|██▉                                         | 2/30 [00:04<01:01,  2.18s/it]

Error encountered while processing file: something1/Allied Nations Celebrate 75th Anniversary of VE Day [PnNyfj6AOh4].wav


 10%|████▍                                       | 3/30 [00:06<00:52,  1.96s/it]

Error encountered while processing file: something1/Bonus Footage of Kilmeade's Exclusive White House Tour [ybhKTd4tkok].wav


 13%|█████▊                                      | 4/30 [00:08<00:52,  2.03s/it]

Error encountered while processing file: something1/Army Christmas Special Featured on Fox Nation [8HnIlPSMKD4].wav


 17%|███████▎                                    | 5/30 [00:10<00:51,  2.04s/it]

Error encountered while processing file: something1/Abby Hornacek practices social distancing out in nature [-mTeqItwbWE].wav


 20%|████████▊                                   | 6/30 [00:14<01:05,  2.74s/it]

Error encountered while processing file: something1/Beth Moore shares wisdom from the Book of John [_09-qTCXunk].wav


 20%|████████▊                                   | 6/30 [00:15<01:03,  2.64s/it]


KeyboardInterrupt: 