In [None]:
import os
import numpy as np
import librosa
import pickle
import tensorflow as tf
import matplotlib.pyplot as plt

from tensorflow.keras.models import load_model

In [None]:
# ----------- USER INPUT ----------
audio_path = "/Volumes/Sandhya TB2/F1 MAIN/UNUSED/KickSauberFerrari_ValtteriBottas77_CircuitdeSpaFrancorchamps^Belgium_lap40.wav" # test audio wav file
model_dir = "/Users/govindamadhavabs/Desktop/F1_newModels" # path to folder containing models

In [None]:
# Suppress TensorFlow training logs
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

# Load label encoders
with open(os.path.join(model_dir, "label_encoders.pkl"), "rb") as f:
    label_encoders = pickle.load(f)

# Load models
team_model = load_model(os.path.join(model_dir, "team_model_20250414_033229.keras"))
driver_model = load_model(os.path.join(model_dir, "driver_model_20250414_054902.keras"))
track_model = load_model(os.path.join(model_dir, "track_model_20250415_123213.keras"))

In [None]:
# Feature extraction
def extract_features(audio_path, sr=22050):
    try:
        y, sr = librosa.load(audio_path, sr=sr)
        features = {
            'mfcc': librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20),
            'mel': librosa.power_to_db(librosa.feature.melspectrogram(y=y, sr=sr)),
            'chroma': librosa.feature.chroma_stft(y=y, sr=sr)
        }
        combined = np.vstack([librosa.util.fix_length(v, size=128, axis=1) for v in features.values()])
        return combined.T  # shape: (128, num_features)
    except Exception as e:
        print(f"Error processing {audio_path}: {str(e)}")
        return None

features = extract_features(audio_path)
if features is None:
    exit()

In [None]:
# Reshape features to match model input shape
def reshape_features(features, input_shape):
    height, width, channels = input_shape[1], input_shape[2], input_shape[3]
    if features.shape[0] != height or features.shape[1] != width:
        features = librosa.util.fix_length(features, size=height, axis=0)
        features = librosa.util.fix_length(features, size=width, axis=1)
    return features[np.newaxis, ..., np.newaxis]

X_team = reshape_features(features, team_model.input_shape)
X_driver = reshape_features(features, driver_model.input_shape)
X_track = reshape_features(features, track_model.input_shape)

In [None]:
# Predict top-3
def predict_top3(model, X, encoder):
    probs = model.predict(X, verbose=0)[0]
    top3_indices = probs.argsort()[-3:][::-1]
    top3_labels = encoder.inverse_transform(top3_indices)
    top3_scores = probs[top3_indices]
    return list(zip(top3_labels, top3_scores))

team_top3 = predict_top3(team_model, X_team, label_encoders['team'])
driver_top3 = predict_top3(driver_model, X_driver, label_encoders['driver'])
track_top3 = predict_top3(track_model, X_track, label_encoders['track'])

In [None]:
# --- PYIN pitch extraction ---
y, sr = librosa.load(audio_path, sr=None)
f0, voiced_flag, voiced_probs = librosa.pyin(y, fmin=librosa.note_to_hz('C2'), fmax=librosa.note_to_hz('C7'))
times = librosa.times_like(f0, sr=sr)

In [None]:
# --- PLOT dialog box with pitch + predictions ---
fig, ax = plt.subplots(2, 1, figsize=(12, 6), gridspec_kw={'height_ratios': [2, 1]})

# Pitch plot
ax[0].plot(times, f0, label='Pitch (f0)', color='blue')
ax[0].set_title("Pitch Contour (PYIN)", fontsize=14)
ax[0].set_xlabel("Time (s)")
ax[0].set_ylabel("Frequency (Hz)")
ax[0].grid(True)

# Prediction table
ax[1].axis('off')
columns = ["Team", "Driver", "Track"]
rows = []
for i in range(3):
    t = f"{team_top3[i][0]} ({team_top3[i][1]*100:.2f}%)"
    d = f"{driver_top3[i][0]} ({driver_top3[i][1]*100:.2f}%)"
    tr = f"{track_top3[i][0]} ({track_top3[i][1]*100:.2f}%)"
    rows.append([t, d, tr])

table = ax[1].table(cellText=rows, colLabels=columns, loc='center', cellLoc='left')
table.auto_set_font_size(False)
table.set_fontsize(10)
table.scale(1.2, 1.5)

plt.tight_layout()
plt.show()