<a href="https://colab.research.google.com/github/Mouliprasanna/AI-Basketball-Shot-Detection-Tracker/blob/master/Copy_of_Madhavlabs.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Mount Google Drive (if needed)
from google.colab import drive
drive.mount('/content/drive')

# Install required libraries
!pip install tensorflow numpy pandas librosa matplotlib scikit-learn

import os
import zipfile
import urllib.request

# Define dataset URL and paths
dataset_url = "https://zenodo.org/api/records/6967442/files-archive"
dataset_path = "/content/nonspeech7k.zip"

# Download dataset
if not os.path.exists(dataset_path):
    print("Downloading dataset...")
    urllib.request.urlretrieve(dataset_url, dataset_path)
    print("Download complete!")

# Extract dataset
extract_path = "/content/nonspeech7k"
if not os.path.exists(extract_path):
    print("Extracting dataset...")
    with zipfile.ZipFile(dataset_path, 'r') as zip_ref:
        zip_ref.extractall(extract_path)
    print("Extraction complete!")

# List extracted files
print("Extracted files:", os.listdir(extract_path))




Mounted at /content/drive
Downloading dataset...
Download complete!
Extracting dataset...
Extraction complete!
Extracted files: ['metadata of test set.csv', 'test.zip', 'youtube ID vs link .TXT', 'metadata of train set .csv', 'train.zip']


In [None]:
extract_path = "/content/nonspeech7k/train/train"
if not os.path.exists(extract_path):
    print("Extracting dataset...")
    with zipfile.ZipFile("/content/nonspeech7k/train.zip", 'r') as zip_ref:
        zip_ref.extractall(extract_path)
    print("Extraction complete!")

# List extracted files
print("Extracted files:", os.listdir(extract_path))

Extracting dataset...
Extraction complete!
Extracted files: ['train']


In [None]:
import pandas as pd

# Paths to metadata files
extract_path = "/content/nonspeech7k"
train_metadata_path = os.path.join(extract_path, 'metadata of train set .csv')
test_metadata_path = os.path.join(extract_path, 'metadata of test set.csv')

# Load metadata
train_metadata = pd.read_csv(train_metadata_path)
test_metadata = pd.read_csv(test_metadata_path)

# Show first few rows
train_metadata.head()


Unnamed: 0,Filename,File ID,Duration in ms,Class ID,Classname,augmentation id,Augmentation type,source
0,108160-1_0_0.wav,108160,3730,0,breath,0,Orignal,https://freesound.org/
1,108160-2_0_0.wav,108160,4000,0,breath,0,Orignal,https://freesound.org/
2,108160-3_0_0.wav,108160,4000,0,breath,0,Orignal,https://freesound.org/
3,108160-4_0_0.wav,108160,2226,0,breath,0,Orignal,https://freesound.org/
4,146769-1_0_0.wav,146769,3540,0,breath,0,Orignal,https://freesound.org/


In [None]:
import librosa
import numpy as np
import tensorflow_hub as hub

# Load YAMNet model
yamnet_model = hub.load('https://tfhub.dev/google/yamnet/1')

# Function to extract MFCC & YAMNet embeddings
def extract_features(audio_path):
    y, sr = librosa.load(audio_path, sr=32000)  # Load at 32kHz
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    mfcc_mean = np.mean(mfcc, axis=1)

    # Extract YAMNet embeddings
    scores, embeddings, _ = yamnet_model(y)
    yamnet_embedding = np.mean(embeddings.numpy(), axis=0)

    # Combine both features
    combined_features = np.concatenate((mfcc_mean, yamnet_embedding))
    return combined_features

# Apply feature extraction to dataset
train_features = np.array([extract_features(f"/content/nonspeech7k/train/train/train/{f}") for f in train_metadata['Filename']])
test_features = np.array([extract_features(f"/content/nonspeech7k/train/test/test/{f}") for f in test_metadata['Filename']])

# Convert labels to numeric
test_metadata['Classname'] = test_metadata['Classname'].replace('yawm', 'yawn')

from sklearn.preprocessing import LabelEncoder

encoder = LabelEncoder()
train_labels = encoder.fit_transform(train_metadata['Classname'])
test_labels = encoder.transform(test_metadata['Classname'])

print("Feature extraction complete!")


Feature extraction complete!


In [None]:
print(train_features.shape)  # Expected: (num_train_samples, 1037)
print(test_features.shape)   # Expected: (num_test_samples, 1037)


(6289, 1037)
(725, 1037)


In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder

# Encode labels
encoder = LabelEncoder()
train_labels_encoded = encoder.fit_transform(train_metadata['Classname'])
test_labels_encoded = encoder.transform(test_metadata['Classname'])

num_classes = len(np.unique(train_labels_encoded))
train_labels_one_hot = to_categorical(train_labels_encoded, num_classes=num_classes)

# Reshape input features for CNN
train_features_reshaped = np.expand_dims(train_features, axis=-1)
test_features_reshaped = np.expand_dims(test_features, axis=-1)

# Define CNN model
def build_model(input_shape, num_classes):
    model = models.Sequential([
        layers.Input(shape=input_shape),
        layers.Conv1D(64, kernel_size=3, activation='relu'),
        layers.MaxPooling1D(2),
        layers.Conv1D(128, kernel_size=3, activation='relu'),
        layers.MaxPooling1D(2),
        layers.Flatten(),
        layers.Dense(128, activation='relu'),
        layers.Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# Build and train the model
cnn_model = build_model(input_shape=(1037, 1), num_classes=num_classes)
cnn_model.fit(train_features_reshaped, train_labels_one_hot, epochs=20, validation_split=0.2, batch_size=32)

# Save model
cnn_model.save('/content/cnn_model.h5')

print("Model training complete!")


Epoch 1/20
[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 170ms/step - accuracy: 0.7036 - loss: 0.7974 - val_accuracy: 0.1113 - val_loss: 9.0965
Epoch 2/20
[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 172ms/step - accuracy: 0.8735 - loss: 0.3351 - val_accuracy: 0.1463 - val_loss: 12.2021
Epoch 3/20
[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 180ms/step - accuracy: 0.9068 - loss: 0.2549 - val_accuracy: 0.1383 - val_loss: 13.7251
Epoch 4/20
[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 182ms/step - accuracy: 0.9223 - loss: 0.2061 - val_accuracy: 0.1169 - val_loss: 12.3728
Epoch 5/20
[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 172ms/step - accuracy: 0.9230 - loss: 0.2076 - val_accuracy: 0.1391 - val_loss: 14.9417
Epoch 6/20
[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 172ms/step - accuracy: 0.9484 - loss: 0.1440 - val_accuracy: 0.1391 - val_loss: 15.2552
Epoch



Model training complete!


In [None]:
from sklearn.metrics import classification_report

# Make predictions
pred_labels = np.argmax(cnn_model.predict(test_features_reshaped), axis=1)

# Classification report
report = classification_report(test_labels, pred_labels, target_names=encoder.classes_, digits=4)
print(report)


[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 41ms/step
              precision    recall  f1-score   support

      breath     0.8081    0.8688    0.8373       160
       cough     0.3626    0.8857    0.5145        70
      crying     0.6570    0.6634    0.6602       205
       laugh     0.6171    0.7714    0.6857       140
   screaming     0.0000    0.0000    0.0000        70
      sneeze     0.0000    0.0000    0.0000        30
        yawn     0.0000    0.0000    0.0000        50

    accuracy                         0.6138       725
   macro avg     0.3493    0.4556    0.3854       725
weighted avg     0.5183    0.6138    0.5536       725



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
from sklearn.model_selection import StratifiedKFold
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import classification_report
import numpy as np

# Convert labels to categorical
train_labels_categorical = to_categorical(train_labels)
num_classes = train_labels_categorical.shape[1]

# Perform 3-Fold CV
kf = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)

f1_scores = []
precisions = []
recalls = []

for train_index, val_index in kf.split(train_features, train_labels):
    X_train, X_val = train_features[train_index], train_features[val_index]
    y_train, y_val = train_labels_categorical[train_index], train_labels_categorical[val_index]

    # Reshape for CNN
    X_train = np.expand_dims(X_train, axis=-1)
    X_val = np.expand_dims(X_val, axis=-1)

    # Train model
    model = build_model(input_shape=(train_features.shape[1], 1), num_classes=num_classes)
    model.fit(X_train, y_train, epochs=10, batch_size=32, verbose=0)

    # Predict
    val_preds = np.argmax(model.predict(X_val), axis=1)
    val_true = np.argmax(y_val, axis=1)

    # Compute metrics
    report = classification_report(val_true, val_preds, output_dict=True)
    f1_scores.append(report['weighted avg']['f1-score'])
    precisions.append(report['weighted avg']['precision'])
    recalls.append(report['weighted avg']['recall'])

# Print cross-validation results
print(f"Avg F1 Score: {np.mean(f1_scores):.4f}")
print(f"Avg Precision: {np.mean(precisions):.4f}")
print(f"Avg Recall: {np.mean(recalls):.4f}")


[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 33ms/step
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 56ms/step
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 35ms/step
Avg F1 Score: 0.8667
Avg Precision: 0.8700
Avg Recall: 0.8674
