In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torchaudio

In [None]:
import librosa
import numpy as np

def extract_features(audio_file, sr=22050, n_mfcc=13):
    """
    Extract audio features from an audio file.

    Parameters:
        audio_file (str): Path to the audio file.
        sr (int): Sample rate.
        n_mfcc (int): Number of Mel-frequency cepstral coefficients (MFCCs) to extract.

    Returns:
        numpy.ndarray: Extracted audio features.
    """
    # Load audio file
    y, sr = librosa.load(audio_file, sr=sr)

    # Extract MFCC features
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)

    # Calculate mean and standard deviation of MFCCs
    mfccs_mean = np.mean(mfccs, axis=1)
    mfccs_std = np.std(mfccs, axis=1)

    # Concatenate mean and standard deviation
    features = np.concatenate((mfccs_mean, mfccs_std))

    return features

# Example usage
audio_file = "example_audio.wav"
features = extract_features(audio_file)
print("Extracted features shape:", features.shape)

In [None]:
import networkx as nx
import matplotlib.pyplot as plt

# Create a graph
G = nx.Graph()

# Nodes
nodes = [0, 1, 1, 0, 0, 1, 1, 1]

# Edges
edges = [2, 1, 1, 1, 2, 1, 1]

# Adjacency List
adjacency_list = [[1, 0], [2, 0], [4, 3], [6, 2], [7, 3], [7, 4], [7, 5]]

# Global Attribute
global_attribute = 0

# Add nodes to the graph
for node_id, node_value in enumerate(nodes):
    G.add_node(node_id, value=node_value)

# Add edges to the graph
for i, edge_weight in enumerate(edges):
    G.add_edge(i, i+1, weight=edge_weight)

# Set global attribute
G.graph['global'] = global_attribute

# Visualize the graph
pos = nx.spring_layout(G, seed=42)
labels = nx.get_node_attributes(G, 'value')
edge_labels = nx.get_edge_attributes(G, 'weight')
nx.draw(G, pos, with_labels=True, labels=labels, node_size=2000, node_color='lightblue', font_size=10)
nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, font_size=10)
plt.title("Sample Graph")
plt.show()

In [None]:
!pip install torch

Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch)
  Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)
Collecting nvidia-cufft-cu12==11.0.2.54 (from torch)
  Using cached nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl (121.6 MB)
Collecting nvidia-curand-cu12==10.3.2.106 (from torch)
  Using cached nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl (56.5 MB)
Collectin

In [None]:
import torch

# Define labels and map them to integers
labels = {"sports": 0, "interview": 1, "reporting": 2, "debate": 3}

# Load pre-trained BERT model and tokenizer for sequence classification
model_name = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=len(labels))

def label_audio(audio_path, transcript):
  """
  Labels an audio clip based on its transcript using a BERT model.

  Args:
      audio_path: Path to the audio file.
      transcript: Text transcript of the audio content.

  Returns:
      Predicted label (sports, interview, reporting, debate)
  """

  # Preprocess transcript (tokenization)
  encoded_transcript = tokenizer(transcript, return_tensors="pt")

  # Load audio using torchaudio
  waveform, sample_rate = torchaudio.load(audio_path)

  # Feature extraction
  mel_spectrogram = torchaudio.transforms.MelSpectrogram(sample_rate=sample_rate)(waveform)
  extract_features(audio_file, sr=22050, n_mfcc=13, mel_spectrogram)

  # Combine transcript and audio features (replace with your feature fusion method)
  # This example simply concatenates the transcript and mel spectrogram
  combined_features = torch.cat((encoded_transcript["input_ids"], mel_spectrogram), dim=1)

  # Make prediction
  outputs = model(combined_features)
  predictions = torch.argmax(outputs.logits, dim=-1)

  # Map prediction back to label
  predicted_label = labels[int(predictions.item())]
  return predicted_label

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
class AudioTaggingGNN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(AudioTaggingGNN, self).__init__()
        self.conv1 = GCNConv(input_dim, hidden_dim)
        self.conv2 = GCNConv(hidden_dim, output_dim)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.conv2(x, edge_index)
        return F.log_softmax(x, dim=1)

# Create a PyTorch Geometric Data object
data = Data(x=x, edge_index=edge_index)

# Initialize the model
model = AudioTaggingGNN(input_dim=input_dim, hidden_dim=64, output_dim=10)  # Assuming 10 audio classes

# Forward pass
output = model(data.x, data.edge_index)


In [None]:
audio_path = "https://youtu.be/ADnKaougM6M?si=sNzG7M94j4sjs6Bi"
transcript = extract_features()

predicted_label = label_audio(audio_path, transcript)
print(f"Transcript: {transcript}")
print(f"Predicted Label: {predicted_label}")

Transcript: it represents colonization of Bloodshed give it back to India I don't see why an Indian child from India has to travel all the way to the UK to look at him just April narinda the the Sikhs the in Punjab who also by the way the the ruler that you mentioned was also a ruler of Lahore so is Pakistan going to have the same effect history please they stole it from the Persian Empire the Empire the Persian Empire so you know can I just say it represents colonization of Bloodshed give it back to India I don't see why an Indian child from India has to travel all the way to the UK to look at it foreign thank you
Predicted Label: Debate


In [None]:
audio_path = "https://youtu.be/Rv-H6xXiozU?si=BBR2-CQL1a0pf5WH"
transcript = extract_features()

predicted_label = label_audio(audio_path, transcript)
print(f"Transcript: {transcript}")
print(f"Predicted Label: {predicted_label}")

the Tesla Chief Elon Musk has met government officials in Beijing Chinese State media is reporting that the tech entrepreneur is thereby invitation for the promotion of international trade they have discussed data and Technology relating to electric vehicles Tesla has been facing a price war with Chinese brands in the electric vehicle Market it has recently said it will lay off 10% of its Global Workforce we can now speak to our business reporter David wadell who's in The Newsroom so David why is Mr musk in China well Mr musk's challenge is amongst other things that he's trying to sell more units of his Tesla company's full self-driving software that's the smartphones and let me show you this this is uh are by a company called xang motors which produces not only EVS but here you see a flying vehicle and it's also producing its own form of full self-driving software that is designed to compete with Tesla
Predicted Label: Interview
