## AUDIO CONTENT BASED ANALYSIS

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install essentia
!pip install essentia-tensorflow

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting essentia
  Downloading essentia-2.1b6.dev858-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.6/13.6 MB[0m [31m44.0 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: essentia
Successfully installed essentia-2.1b6.dev858
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting essentia-tensorflow
  Downloading essentia_tensorflow-2.1b6.dev858-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (291.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m291.4/291.4 MB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: essentia-tensorflow
Successfully installed essentia-tensorflow-2.1b6.dev858


## Loop for features extraction. 

In [None]:
import csv
from tqdm import tqdm
import numpy as np
import json
import os
from essentia.standard import  MonoLoader, RhythmExtractor2013, TensorflowPredictEffnetDiscogs, TensorflowPredictMusiCNN, TensorflowPredict2D
import pandas as pd

def find_audio_files(path):
    audio_files = []

    for root, dirs, files in os.walk(path):
        for file in files:
            if file.endswith(('.wav', '.mp3', '.flac', '.aac')):
                audio_files.append(os.path.join(root, file))
    print('Audio files extracted',audio_files)
    print('Dataset size:',len(audio_files))
    return audio_files

path = "/content/drive/MyDrive/Colab_Notebooks/AMPLAB/audio_chunks"
audio_files = find_audio_files(path)

with open("/content/drive/MyDrive/Colab_Notebooks/AMPLAB/styles.txt", "r") as file:
    # Read the contents of the file and split the lines into a list of strings
    styles = file.read().splitlines()


def load_audio_file(path):
  return MonoLoader(filename=path)()

def bpm_calculation(audio):
  bpm,_,confidence,_,_=RhythmExtractor2013()(audio)

  return bpm,confidence
  
def music_style(audio,styles):
  model = TensorflowPredictEffnetDiscogs(graphFilename="/content/drive/MyDrive/Colab_Notebooks/AMPLAB/discogs-effnet-bs64-1.pb")
  activations = model(audio)
  activations = np.mean(activations, axis=0)
  predominant_index = np.argmax(activations)
  predominant=styles[predominant_index]
  
  return activations,predominant

def voice_instrumental(audio):
  model= TensorflowPredictMusiCNN(graphFilename="/content/drive/MyDrive/Colab_Notebooks/AMPLAB/voice_instrumental-musicnn-msd-2.pb")
  activations = model(audio)
  activations = np.mean(activations, axis=0)
  instrumental = activations[0]
  voice = activations [1]
  
  return instrumental, voice

def danceabilities(audio):
  model= TensorflowPredictMusiCNN(graphFilename='/content/drive/MyDrive/Colab_Notebooks/AMPLAB/danceability-musicnn-msd-2.pb')
  activations = model(audio)
  activations = np.mean(activations, axis=0)
  danceable = activations[0]
  
  return danceable

def arousal_valence(audio):
  embeddings_model = TensorflowPredictMusiCNN(graphFilename="/content/drive/MyDrive/Colab_Notebooks/AMPLAB/msd-musicnn-1.pb",output="model/dense/BiasAdd")
  model=TensorflowPredict2D(graphFilename='/content/drive/MyDrive/Colab_Notebooks/AMPLAB/emomusic-musicnn-msd-2.pb',output="model/Identity")
  embeddings=embeddings_model(audio)
  activations = model(embeddings)
  activations = np.mean(activations, axis=0)
  arousal = activations[0]
  valence = activations[1]

  return arousal, valence

df = pd.DataFrame(columns=['audio_path', 'bpm', 'music_styles', 'voice', 'instrumental', 'danceability', 'arousal', 'valence', 'predominant music style'])

total_files= len(audio_files)
# Open the JSON file in append mode
with open('/content/drive/MyDrive/Colab_Notebooks/AMPLAB/features/features.csv', 'a') as f:
  writer = csv.writer(f)
  with tqdm(total=total_files, desc='Processing audio files') as pbar:
    for i, audio_path in tqdm(enumerate(audio_files, 0)):
      audio = load_audio_file(audio_path)

      bpm, confidence = bpm_calculation(audio)
      music_styles, predominant = music_style(audio,styles)
      instrumental, voice  = voice_instrumental(audio)
      danceability = danceabilities(audio)
      arousal, valence = arousal_valence(audio)
     
      df.loc[len(df)] = [audio_path, bpm, music_styles, voice, instrumental, danceability, arousal, valence, predominant]
      df.to_csv('/content/drive/MyDrive/Colab_Notebooks/AMPLAB/features/features.csv', index=False)

      pbar.update(1)
      pbar.set_postfix({'progress': f'{pbar.n}/{total_files} ({pbar.n/total_files:.0%})'})



Audio files extracted ['/content/drive/MyDrive/Colab_Notebooks/AMPLAB/audio_chunks/audio.001/5Z/5Z54RgCfhRljLVjPZHy5dv.mp3', '/content/drive/MyDrive/Colab_Notebooks/AMPLAB/audio_chunks/audio.001/7G/7GgYmXY3PfDjTiyjUlvF7Y.mp3', '/content/drive/MyDrive/Colab_Notebooks/AMPLAB/audio_chunks/audio.001/0z/0zf1BQJ4om2qU0W9muvnLn.mp3', '/content/drive/MyDrive/Colab_Notebooks/AMPLAB/audio_chunks/audio.001/4s/4sCcDvX30uu39ozvxcRsqB.mp3', '/content/drive/MyDrive/Colab_Notebooks/AMPLAB/audio_chunks/audio.001/6Z/6ZBiXweylRlROqwP3ODYgw.mp3', '/content/drive/MyDrive/Colab_Notebooks/AMPLAB/audio_chunks/audio.001/4X/4XLVg8qERMwZBy0HHr2DdW.mp3', '/content/drive/MyDrive/Colab_Notebooks/AMPLAB/audio_chunks/audio.001/4X/4XQETu7QYGbUhTt68kekOn.mp3', '/content/drive/MyDrive/Colab_Notebooks/AMPLAB/audio_chunks/audio.001/4r/4rhaDfV9y5UvGZ67BT8U35.mp3', '/content/drive/MyDrive/Colab_Notebooks/AMPLAB/audio_chunks/audio.001/2D/2DOfw6UDf0eyCdb0SMVZrE.mp3', '/content/drive/MyDrive/Colab_Notebooks/AMPLAB/audio_chunks

1846it [3:11:41,  8.69s/it]

In [None]:
import csv
from tqdm import tqdm
import numpy as np
import json
import os
from essentia.standard import  MonoLoader, RhythmExtractor2013, TensorflowPredictEffnetDiscogs, TensorflowPredictMusiCNN, TensorflowPredict2D
import pandas as pd

def find_audio_files(path):
    audio_files = []

    for root, dirs, files in os.walk(path):
        for file in files:
            if file.endswith(('.wav', '.mp3', '.flac', '.aac')):
                audio_files.append(os.path.join(root, file))
    print('Audio files extracted',audio_files)
    print('Dataset size:',len(audio_files))
    return audio_files

path = "/content/drive/MyDrive/Colab_Notebooks/AMPLAB/audio_chunks"
audio_files = find_audio_files(path)
audio_files = audio_files[1850:]
print('Length of audio files:',len(audio_files))

with open("/content/drive/MyDrive/Colab_Notebooks/AMPLAB/styles.txt", "r") as file:
    # Read the contents of the file and split the lines into a list of strings
    styles = file.read().splitlines()


def load_audio_file(path):
  return MonoLoader(filename=path)()

def bpm_calculation(audio):
  bpm,_,confidence,_,_=RhythmExtractor2013()(audio)

  return bpm,confidence
  
def music_style(audio,styles):
  model = TensorflowPredictEffnetDiscogs(graphFilename="/content/drive/MyDrive/Colab_Notebooks/AMPLAB/discogs-effnet-bs64-1.pb")
  activations = model(audio)
  activations = np.mean(activations, axis=0)
  predominant_index = np.argmax(activations)
  predominant=styles[predominant_index]
  
  return activations,predominant

def voice_instrumental(audio):
  model= TensorflowPredictMusiCNN(graphFilename="/content/drive/MyDrive/Colab_Notebooks/AMPLAB/voice_instrumental-musicnn-msd-2.pb")
  activations = model(audio)
  activations = np.mean(activations, axis=0)
  instrumental = activations[0]
  voice = activations [1]
  
  return instrumental, voice

def danceabilities(audio):
  model= TensorflowPredictMusiCNN(graphFilename='/content/drive/MyDrive/Colab_Notebooks/AMPLAB/danceability-musicnn-msd-2.pb')
  activations = model(audio)
  activations = np.mean(activations, axis=0)
  danceable = activations[0]
  
  return danceable

def arousal_valence(audio):
  embeddings_model = TensorflowPredictMusiCNN(graphFilename="/content/drive/MyDrive/Colab_Notebooks/AMPLAB/msd-musicnn-1.pb",output="model/dense/BiasAdd")
  model=TensorflowPredict2D(graphFilename='/content/drive/MyDrive/Colab_Notebooks/AMPLAB/emomusic-musicnn-msd-2.pb',output="model/Identity")
  embeddings=embeddings_model(audio)
  activations = model(embeddings)
  activations = np.mean(activations, axis=0)
  arousal = activations[0]
  valence = activations[1]

  return arousal, valence

df = pd.DataFrame(columns=['audio_path', 'bpm', 'music_styles', 'voice', 'instrumental', 'danceability', 'arousal', 'valence', 'predominant music style'])

total_files= len(audio_files)

# Open the JSON file in append mode

with open('/content/drive/MyDrive/Colab_Notebooks/AMPLAB/features/features2.csv', 'a') as f:
  writer = csv.writer(f)
  with tqdm(total=total_files, desc='Processing audio files') as pbar:
    for audio_path in tqdm(audio_files):
      audio = load_audio_file(audio_path)

      bpm, confidence = bpm_calculation(audio)
      music_styles, predominant = music_style(audio,styles)
      instrumental, voice  = voice_instrumental(audio)
      danceability = danceabilities(audio)
      arousal, valence = arousal_valence(audio)
      
     
      df.loc[len(df)] = [audio_path, bpm, music_styles, voice, instrumental, danceability, arousal, valence, predominant]
      df.to_csv('/content/drive/MyDrive/Colab_Notebooks/AMPLAB/features/features2.csv', index=False)

      pbar.update(1)
      pbar.set_postfix({'progress': f'{pbar.n}/{total_files} ({pbar.n/total_files:.0%})'})

Audio files extracted ['/content/drive/MyDrive/Colab_Notebooks/AMPLAB/audio_chunks/audio.001/5Z/5Z54RgCfhRljLVjPZHy5dv.mp3', '/content/drive/MyDrive/Colab_Notebooks/AMPLAB/audio_chunks/audio.001/7G/7GgYmXY3PfDjTiyjUlvF7Y.mp3', '/content/drive/MyDrive/Colab_Notebooks/AMPLAB/audio_chunks/audio.001/0z/0zf1BQJ4om2qU0W9muvnLn.mp3', '/content/drive/MyDrive/Colab_Notebooks/AMPLAB/audio_chunks/audio.001/4s/4sCcDvX30uu39ozvxcRsqB.mp3', '/content/drive/MyDrive/Colab_Notebooks/AMPLAB/audio_chunks/audio.001/6Z/6ZBiXweylRlROqwP3ODYgw.mp3', '/content/drive/MyDrive/Colab_Notebooks/AMPLAB/audio_chunks/audio.001/4X/4XLVg8qERMwZBy0HHr2DdW.mp3', '/content/drive/MyDrive/Colab_Notebooks/AMPLAB/audio_chunks/audio.001/4X/4XQETu7QYGbUhTt68kekOn.mp3', '/content/drive/MyDrive/Colab_Notebooks/AMPLAB/audio_chunks/audio.001/4r/4rhaDfV9y5UvGZ67BT8U35.mp3', '/content/drive/MyDrive/Colab_Notebooks/AMPLAB/audio_chunks/audio.001/2D/2DOfw6UDf0eyCdb0SMVZrE.mp3', '/content/drive/MyDrive/Colab_Notebooks/AMPLAB/audio_chunks

Processing audio files:   0%|          | 0/250 [00:00<?, ?it/s]
100%|██████████| 250/250 [16:47<00:00,  4.03s/it]


In [None]:
df

Unnamed: 0,audio_path,bpm,music_styles,voice,instrumental,danceability,arousal,valence,predominant music style
0,/content/drive/MyDrive/Colab_Notebooks/AMPLAB/...,135.438889,"[9.019404e-06, 0.00010508032, 0.00020468996, 0...",0.458111,0.263399,0.270036,3.478802,4.271688,"""Electronic---Vaporwave"","
1,/content/drive/MyDrive/Colab_Notebooks/AMPLAB/...,146.197830,"[4.537728e-06, 4.8221067e-05, 0.00042826874, 0...",0.305626,0.359743,0.257943,3.675216,4.951470,"""Electronic---Power Electronics"","
2,/content/drive/MyDrive/Colab_Notebooks/AMPLAB/...,75.443779,"[3.7539594e-06, 3.8993836e-05, 3.9785442e-05, ...",0.351090,0.334521,0.316073,3.422647,4.959703,"""Electronic---Vaporwave"","
3,/content/drive/MyDrive/Colab_Notebooks/AMPLAB/...,106.415306,"[4.9772884e-06, 2.43401e-05, 7.097772e-05, 0.0...",0.494610,0.202900,0.450870,3.394173,5.769993,"""Electronic---Experimental"","
4,/content/drive/MyDrive/Colab_Notebooks/AMPLAB/...,139.765930,"[6.6940797e-06, 2.4406314e-05, 4.0803196e-05, ...",0.310157,0.348921,0.549900,3.051960,4.095187,"""Electronic---Industrial"","
...,...,...,...,...,...,...,...,...,...
245,/content/drive/MyDrive/Colab_Notebooks/AMPLAB/...,73.310852,"[8.601876e-06, 1.1693893e-05, 2.6612512e-05, 6...",0.254572,0.389736,0.185603,3.290955,3.507015,"""Electronic---Experimental"","
246,/content/drive/MyDrive/Colab_Notebooks/AMPLAB/...,136.569183,"[4.660627e-05, 6.681148e-05, 0.00012161084, 0....",0.226477,0.417134,0.344109,3.054225,4.300512,"""Electronic---Experimental"","
247,/content/drive/MyDrive/Colab_Notebooks/AMPLAB/...,108.029228,"[3.8266026e-06, 2.9110623e-05, 2.6878745e-06, ...",0.201561,0.451785,0.874785,3.000327,4.777125,"""Electronic---Vaporwave"","
248,/content/drive/MyDrive/Colab_Notebooks/AMPLAB/...,131.834244,"[2.620681e-05, 3.597975e-05, 0.0003085332, 0.0...",0.427077,0.332533,0.309887,3.728135,3.764989,"""Electronic---Vaporwave"","


In [None]:
import csv

df1 = pd.read_csv('/content/drive/MyDrive/Colab_Notebooks/AMPLAB/features/features.csv')

# Read the second CSV file into a DataFrame
df2 = pd.read_csv('/content/drive/MyDrive/Colab_Notebooks/AMPLAB/features/features2.csv')

# Concatenate the two DataFrames vertically
#combined_df = pd.concat([df1, df2])

# Write the combined DataFrame to a CSV file
#combined_df.to_csv('/content/drive/MyDrive/Colab_Notebooks/AMPLAB/features/combined_features.csv', index=False)
      
df1=df1[:1850]

combined_df = pd.concat([df1, df2])
combined_df.to_csv('/content/drive/MyDrive/Colab_Notebooks/AMPLAB/features/features3.csv', index=False)

In [None]:
import pandas as pd
df3=pd.read_csv('/content/drive/MyDrive/Colab_Notebooks/AMPLAB/features/features3.csv')
df3

Unnamed: 0,audio_path,bpm,music_styles,voice,instrumental,danceability,arousal,valence,predominant music style
0,/content/drive/MyDrive/Colab_Notebooks/AMPLAB/...,119.751747,[5.05927419e-05 7.00273304e-05 6.95807976e-04 ...,0.342443,0.332771,0.163486,3.340117,3.374879,"""Electronic---Experimental"","
1,/content/drive/MyDrive/Colab_Notebooks/AMPLAB/...,153.446625,[6.21643674e-04 3.02357425e-04 7.01925135e-04 ...,0.440760,0.287215,0.253267,3.489979,4.115166,"""Electronic---Experimental"","
2,/content/drive/MyDrive/Colab_Notebooks/AMPLAB/...,93.742500,[4.49582330e-05 1.41592987e-04 2.90555734e-04 ...,0.253248,0.389446,0.265566,3.702826,4.368959,"""Electronic---Experimental"","
3,/content/drive/MyDrive/Colab_Notebooks/AMPLAB/...,108.424217,[9.99165604e-06 4.22870653e-04 4.65890975e-04 ...,0.504235,0.243679,0.257150,3.040704,5.242821,"""Electronic---Experimental"","
4,/content/drive/MyDrive/Colab_Notebooks/AMPLAB/...,165.349686,[1.62254364e-05 1.80955947e-04 1.43213590e-04 ...,0.240522,0.441403,0.255030,3.896225,4.917584,"""Electronic---Vaporwave"","
...,...,...,...,...,...,...,...,...,...
2095,/content/drive/MyDrive/Colab_Notebooks/AMPLAB/...,73.310852,[8.60187629e-06 1.16938927e-05 2.66125116e-05 ...,0.254572,0.389736,0.185603,3.290955,3.507015,"""Electronic---Experimental"","
2096,/content/drive/MyDrive/Colab_Notebooks/AMPLAB/...,136.569183,[4.66062702e-05 6.68114808e-05 1.21610843e-04 ...,0.226477,0.417134,0.344109,3.054225,4.300512,"""Electronic---Experimental"","
2097,/content/drive/MyDrive/Colab_Notebooks/AMPLAB/...,108.029228,[3.82660255e-06 2.91106226e-05 2.68787448e-06 ...,0.201561,0.451785,0.874785,3.000327,4.777125,"""Electronic---Vaporwave"","
2098,/content/drive/MyDrive/Colab_Notebooks/AMPLAB/...,131.834244,[2.62068097e-05 3.59797486e-05 3.08533199e-04 ...,0.427077,0.332533,0.309887,3.728135,3.764989,"""Electronic---Vaporwave"","


In [None]:
music_styles_= df3['predominant music style']
#music_styles_ = music_styles_.apply(ast.literal_eval)
print(music_styles_)

m=music_styles_.tolist()
print(m)

0           "Electronic---Experimental",
1           "Electronic---Experimental",
2           "Electronic---Experimental",
3           "Electronic---Experimental",
4              "Electronic---Vaporwave",
                      ...               
2095        "Electronic---Experimental",
2096        "Electronic---Experimental",
2097           "Electronic---Vaporwave",
2098           "Electronic---Vaporwave",
2099        "Electronic---Dark Ambient",
Name: predominant music style, Length: 2100, dtype: object
['    "Electronic---Experimental",', '    "Electronic---Experimental",', '    "Electronic---Experimental",', '    "Electronic---Experimental",', '    "Electronic---Vaporwave",', '    "Electronic---Vaporwave",', '    "Electronic---Experimental",', '    "Electronic---Experimental",', '    "Electronic---Experimental",', '    "Electronic---Experimental",', '    "Electronic---Industrial",', '    "Electronic---Vaporwave",', '    "Non-Music---Spoken Word",', '    "Electronic---Vaporwave",', '

In [None]:
data_list[0]

['audio_path',
 'bpm',
 'music_styles',
 'voice',
 'instrumental',
 'danceability',
 'arousal',
 'valence',
 'predominant music style']