In [1]:
!pip install librosa
!pip install transformers
!pip install torch
!pip install soundfile audioread
!pip install ipywidgets --upgrade
!pip install jupyterlab_widgets



In [2]:
# Updated Library Imports
import os
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, BatchNormalization
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
import torch

In [4]:
import librosa
import os
import matplotlib.pyplot as plt

# Load and Analyze Audio Files
audio_folder = "../data/tartanaviation/raw/kbtp/2020/11/11-02-20_audio"
audio_files = [f for f in os.listdir(audio_folder) if f.endswith(".wav")]

if not audio_files:
    raise ValueError("No audio files found in the specified directory.")

# Analyze and resample the first audio file
audio_path = os.path.join(audio_folder, audio_files[0])
try:
    # Specify backend to use audioread
    audio, sr = librosa.load(audio_path, sr=44100, backend='audioread')
    resampled_audio = librosa.resample(audio, orig_sr=sr, target_sr=16000)  # Resample to 16 kHz

    # Print details
    print(f"Loaded file: {audio_files[0]}")
    print(f"Original Sample Rate: {sr}")
    print(f"Resampled Sample Rate: 16000")
    print(f"Duration: {len(audio) / sr:.2f} seconds")

    # Plot waveform of resampled audio
    plt.figure(figsize=(10, 4))
    librosa.display.waveshow(resampled_audio, sr=16000)
    plt.title("Waveform of the Resampled Audio File")
    plt.xlabel("Time (seconds)")
    plt.ylabel("Amplitude")
    plt.show()

except Exception as e:
    print(f"Failed to load {audio_files[0]}: {e}")

Failed to load 1.wav: load() got an unexpected keyword argument 'backend'


In [None]:
# Extract and Save MFCC Features
mfcc_folder = "../data/tartanaviation/processed/"
os.makedirs(mfcc_folder, exist_ok=True)

for audio_file in audio_files:
    audio_path = os.path.join(audio_folder, audio_file)
    audio, sr = librosa.load(audio_path, sr=16000)  # Resample to 16 kHz
    
    # Extract MFCCs
    mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=13)
    mfccs_delta = librosa.feature.delta(mfccs)
    mfccs_delta2 = librosa.feature.delta(mfccs, order=2)
    
    # Combine features
    combined_mfccs = np.vstack([mfccs, mfccs_delta, mfccs_delta2])
    
    # Convert to DataFrame and save
    mfcc_df = pd.DataFrame(combined_mfccs.T)
    output_path = os.path.join(mfcc_folder, f"{audio_file.split('.')[0]}_mfcc.csv")
    mfcc_df.to_csv(output_path, index=False)
    print(f"MFCC features saved to {output_path}")

In [None]:
# Speech-to-Text using Wav2Vec2
processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-960h")
model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-960h")

def transcribe_audio(audio_path):
    audio, sr = librosa.load(audio_path, sr=16000)  # Ensure 16 kHz
    input_values = processor(audio, return_tensors="pt", sampling_rate=16000).input_values
    with torch.no_grad():
        logits = model(input_values).logits
    predicted_ids = torch.argmax(logits, dim=-1)
    transcription = processor.batch_decode(predicted_ids)[0]
    return transcription

# Transcribe all audio files
transcriptions = {}
for audio_file in audio_files:
    audio_path = os.path.join(audio_folder, audio_file)
    transcription = transcribe_audio(audio_path)
    transcriptions[audio_file] = transcription
    print(f"{audio_file}: {transcription}")

In [None]:
# Speech-to-Intent: Transcription and Intent Classification
from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC, AutoTokenizer, AutoModelForSequenceClassification
import torch

# Load Wav2Vec2 model for transcription
processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-960h")
speech_to_text_model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-960h")

# Load pre-trained model for intent classification
intent_tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
intent_model = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=3)

# Define intents and their labels
intent_labels = ["clearance_request", "alert", "weather_request"]

# Function for transcription
def transcribe_audio(audio_path):
    audio_input, sr = librosa.load(audio_path, sr=16000)  # Ensure 16kHz for Wav2Vec2
    input_values = processor(audio_input, return_tensors="pt", sampling_rate=16000).input_values
    logits = speech_to_text_model(input_values).logits
    predicted_ids = torch.argmax(logits, dim=-1)
    transcription = processor.decode(predicted_ids[0])
    return transcription

# Function for intent classification
def classify_intent(transcription):
    inputs = intent_tokenizer(transcription, return_tensors="pt", truncation=True, padding=True)
    outputs = intent_model(**inputs)
    predicted_label = torch.argmax(outputs.logits, dim=-1).item()
    return intent_labels[predicted_label]

# Example: Transcribe and classify intent
audio_file_path = "../data/tartanaviation/processed/processed_audio.wav"
transcription = transcribe_audio(audio_file_path)
intent = classify_intent(transcription)
print(f"Transcription: {transcription}")
print(f"Detected Intent: {intent}")

In [None]:
# Speech-to-Action: Decision Support
# Mock trajectory and weather data (replace with actual ATC data sources)
trajectory_data = {"flight_id": "ABC123", "altitude": 30000, "heading": 180}
weather_data = {"location": "runway_22", "visibility": "5 miles", "wind_speed": "10 knots"}

# Function to suggest action based on intent
def suggest_action(intent, trajectory_data, weather_data):
    if intent == "clearance_request":
        return f"Clearance granted for {trajectory_data['flight_id']}."
    elif intent == "alert":
        return f"Notify authorities about restricted airspace violation by {trajectory_data['flight_id']}."
    elif intent == "weather_request":
        return f"Weather update: Visibility {weather_data['visibility']}, Wind Speed {weather_data['wind_speed']}."
    else:
        return "No action required."

# Example: Suggest action based on detected intent
action = suggest_action(intent, trajectory_data, weather_data)
print(f"Suggested Action: {action}")