In [None]:
import librosa
import matplotlib.pyplot as plt
import numpy as np
import librosa.display
from sklearn.preprocessing import MinMaxScaler
import scipy, IPython.display as ipd
import librosa
import soundfile as sf

import pywt


plt.rcParams['figure.figsize'] = (14, 5)

# Load and preprocess audio 
**Label audio segments: Hammering Sound Detection** 

In [None]:
filename = 'Recording-total-hip-summarized'

y, sr = librosa.load(f'voice/{filename}.wav')

ipd.Audio(y, rate=sr)

In [None]:
segment_duration = 60
samples_per_segment = int(segment_duration * sr)

# Split the audio into segments
audio_segments = []
for i in range(0, len(y), samples_per_segment):
    l = len
    segment = y[i:i + samples_per_segment]
    segment = np.reshape(segment, (1,segment.size))
    audio_segments.append(segment)

In [None]:
ipd.Audio(audio_segments[300], rate = sr)

In [None]:
Selected_segments = [2,3,5,9,10,20,22,23,32,34]
temp = np.concatenate([audio_segments[i] for i in Selected_segments],axis= 1)
sf.write('voice/Recording-total-hip-summarized.wav', temp[0], sr)

In [None]:
plt.rcParams['figure.figsize'] = (14, 5)
librosa.display.waveshow(y, sr=sr)
plt.xlabel('Time')
plt.ylabel('Amplitude')
plt.title('Hammering Sound Detection')
plt.legend()
plt.show()

In [None]:
segment_duration = 2
# Calculate the number of samples per segment based on the duration
samples_per_segment = int(segment_duration * sr)

# Split the audio into segments
audio_segments = []
for i in range(0, len(y), samples_per_segment):
    l = len
    segment = y[i:i + samples_per_segment]
    segment = np.reshape(segment, (1,segment.size))
    audio_segments.append(segment)

In [None]:
ipd.Audio(audio_segments[300], rate = sr)

In [None]:
A = [1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
B = [0,0,0,0,0,1,1,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0]
C = [0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0]
D = [0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0]
E = [0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0]
F = [1,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0,0,1,1,1,1,1]
G = [0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0]
H = [0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0]
I = [1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
J = [1,1,1,1,1,1,1,1,1,1,1,0,0,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0,0,0]

In [None]:
Labels = np.array(A + B + C + D + E+ F + G +H +I + J)
file_name = 'Recording-total-hip'
with open(f'Labels/{file_name}-summarized.npy', 'wb') as f:
    np.save(f, Labels)

# Feature Extraction

In [None]:
def normalize(x):
    scaler = MinMaxScaler()
    scaler.fit(x)
    return scaler.transform(x)

# Function to extract signal-based characteristics
def extract_characteristics(segment):
    spectral_centroid = np.mean(librosa.feature.spectral_centroid(y=segment, sr=sr)[0])
    spectral_rolloff = np.mean(librosa.feature.spectral_rolloff(y=segment, sr=sr)[0])
    zero_crossing_rate = np.mean(librosa.feature.zero_crossing_rate(segment)[0])
    rms_energy = np.mean(librosa.feature.rms(y=segment)[0])
    spectral_contrast = np.mean(librosa.feature.spectral_contrast(y=segment, sr=sr)[0])
    chroma = np.mean(librosa.feature.chroma_stft(y=segment, sr=sr))

    return spectral_centroid, spectral_rolloff, zero_crossing_rate, rms_energy, spectral_contrast, chroma

def extract_transforms(segment):
    # Discrete Fourier Transform (DFT)
    dft = np.abs(np.fft.fft(segment))
    return dft

segment_duration = 2
# Calculate the number of samples per segment based on the duration
samples_per_segment = int(segment_duration * sr)

# Split the audio into segments
audio_segments = []
for i in range(0, len(y), samples_per_segment):
    l = len
    segment = y[i:i + samples_per_segment]
    segment = np.reshape(segment, (1,segment.size))
    audio_segments.append(segment)

In [None]:
filename = 'Recording-total-hip-summarized'
y, sr = librosa.load(f'voice/{filename}.wav')

with open(f'Labels/{filename}.npy', 'rb') as f:
    Label_of_sound = np.load(f)

In [None]:
hammering_features = []
speech_features = []
i=0
for  i in range(len(audio_segments)):
    segment = audio_segments[i]
    # features = extract_characteristics(segment)
    features = extract_transforms(segment).flatten()

    if Label_of_sound[i] == 1:
        hammering_features.append(features)
    else:
        speech_features.append(features)


X = np.vstack((hammering_features, speech_features))
y = np.hstack((np.ones(len(hammering_features)), np.zeros(len(speech_features))))

# Classification Models 
- Random Forest
- XGBOOST
- SVM\
Input Features: signal-based characteristics or Discrete Fourier Transform (DFT)

In [None]:
import xgboost as xgb
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a classifier
classifier = RandomForestClassifier(n_estimators=200, random_state=30)
classifier.fit(X_train, y_train)

# Evaluate classifier
predictions = classifier.predict(X_test)


accuracy_rf = np.round(accuracy_score(y_test, predictions),3)
precision_rf = np.round(precision_score(y_test, predictions),3)
recall_rf = np.round(recall_score(y_test, predictions),3)
f1_rf = np.round(f1_score(y_test, predictions),3)

print(f"Random Forest Accuracy: {accuracy_rf}")
print(f"Random Forest Precision: {precision_rf}")
print(f"Random Forest Recall: {recall_rf}")
print(f"Random Forest F1-Score: {f1_rf}")

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train XGBoost classifier
xg_clf = xgb.XGBClassifier()
xg_clf.fit(X_train, y_train)

# Make predictions
y_pred_xgb = xg_clf.predict(X_test)

# Calculate accuracy, precision, recall, and F1-score
accuracy_xgb = np.round(accuracy_score(y_test, y_pred_xgb),3)
precision_xgb = np.round(precision_score(y_test, y_pred_xgb),3)
recall_xgb = np.round(recall_score(y_test, y_pred_xgb),3)
f1_xgb = np.round(f1_score(y_test, y_pred_xgb),3)

print(f"XGBoost Accuracy: {accuracy_xgb}")
print(f"XGBoost Precision: {precision_xgb}")
print(f"XGBoost Recall: {recall_xgb}")
print(f"XGBoost F1-Score: {f1_xgb}")


In [None]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score


# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train SVM classifier
svm_clf = SVC(kernel='rbf') 
svm_clf.fit(X_train, y_train)

# Make predictions
y_pred_svm = svm_clf.predict(X_test)

accuracy_svm = np.round(accuracy_score(y_test, y_pred_svm),3)
precision_svm = np.round(precision_score(y_test, y_pred_svm),3)
recall_svm = np.round(recall_score(y_test, y_pred_svm),3)
f1_svm = np.round(f1_score(y_test, y_pred_svm),3)

print(f"SVM Accuracy: {accuracy_svm}")
print(f"SVM Precision: {precision_svm}")
print(f"SVM Recall: {recall_svm}")
print(f"SVM F1-Score: {f1_svm}")


In [None]:
plt.figure(figsize=(12, 4))

signal = audio_segments[2]

librosa.display.waveshow(signal, sr=sr, alpha=1)
plt.show()

# Use the trained classifier to predict hammering segments