In [1]:
import librosa
import librosa.feature
import librosa.display
import numpy as np
import pandas as pd
from PIL import Image
import os

def load_fixed_audio(path, duration=30, sr=22050):
    audio, _ = librosa.load(path, sr=sr, duration=duration)
    desired_length = duration * sr
    
    # Pad with zeros if shorter
    if len(audio) < desired_length:
        audio = np.pad(audio, (0, desired_length - len(audio)))
    # Or truncate if longer
    else:
        audio = audio[:desired_length]
    
    return audio

def audio_to_melspec(audio, sr=22050, n_mels=128, hop_length=512):
    melspec = librosa.feature.melspectrogram(y=audio, sr=sr, n_mels=n_mels, hop_length=hop_length)
    melspec_db = librosa.power_to_db(melspec+1e-10, ref=np.max, amin=1e-10)
    return melspec_db

def normalize(melspec_db):
    min_val = np.min(melspec_db)
    max_val = np.max(melspec_db)
    # Ensure no division by zero
    if max_val - min_val == 0:
        return np.zeros_like(melspec_db)
    return (melspec_db - min_val) / (max_val - min_val)

def get_all_wav_files(directory):
    wav_files = []
    for dirpath, _, filenames in os.walk(directory):
        for filename in filenames:
            if filename.endswith('.wav'):
                wav_files.append(dirpath + '/' + filename)
    return wav_files


def get_all_image_files(directory):
    wav_files = []
    for dirpath, _, filenames in os.walk(directory):
        for filename in filenames:
            if filename.endswith('.png'):
                wav_files.append(dirpath + '/' + filename)
    return wav_files




In [42]:
files = get_all_wav_files("../data/genres")
entries = []

def clean_labels(s):
    return s.split('.')[0]

def extract_features(file_path, sr=22050, n_mfcc=20):
    
    # Load
    y, sr = librosa.load(file_path, sr=sr, mono=True, duration=30)  # ensuring 30s duration
    features = []
    
    # MFCCs
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
    mfcc_mean = mfcc.mean(axis=1)
    mfcc_std = mfcc.std(axis=1)
    features.extend(mfcc_mean)
    features.extend(mfcc_std)
    
    # Chroma features
    chroma = librosa.feature.chroma_stft(y=y, sr=sr)
    chroma_mean = chroma.mean(axis=1)
    chroma_std = chroma.std(axis=1)
    features.extend(chroma_mean)
    features.extend(chroma_std)
    
    # 3. Spectral contrast
    spec_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
    spec_contrast_mean = spec_contrast.mean(axis=1)
    spec_contrast_std = spec_contrast.std(axis=1)
    features.extend(spec_contrast_mean)
    features.extend(spec_contrast_std)
    
    # 4. Tonnetz features (need harmonic component of audio for tonnetz)
    y_harm = librosa.effects.harmonic(y)  # isolate harmonic component
    tonnetz = librosa.feature.tonnetz(y=y_harm, sr=sr)
    tonnetz_mean = tonnetz.mean(axis=1)
    tonnetz_std = tonnetz.std(axis=1)
    features.extend(tonnetz_mean)
    features.extend(tonnetz_std)
    
    # 5. Tempo 
    tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
    features.append(tempo[0])
    
    # 6. Spectral Rolloff
    spectral_rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)[0]
    features.append(spectral_rolloff.mean())
    features.append(spectral_rolloff.std())
    
    # 7. Spectral Centroids
    spectral_centroids = librosa.feature.spectral_centroid(y=y, sr=sr)[0]
    features.append(spectral_centroids.mean())
    features.append(spectral_centroids.std())
    
    # 9. Zero Crossings Rate
    zc = librosa.zero_crossings(y=y, pad=False)
    features.append(sum(zc)/len(zc))
    
    return features

for file in files:
    try:
        print(file)
        filename = file.split('/')[-1]
        row = extract_features(file)
        entries.append([clean_labels(filename)] + row)
    except Exception as e:
        print(e)
        print(f"Could not process {file}")
        
def get_classical_feature_names(n_mfcc=20, n_chroma=12, n_spec_contrast=7, n_tonnetz=6):
    """
    Returns a list of feature names for the classical features: 
    MFCCs, Chroma, Spectral Contrast, Tonnetz, Tempo.
    """
    column_names = ['target']
    
    # MFCC mean & std
    for i in range(1, n_mfcc+1):
        column_names.append(f"mfcc_{i}_mean")
    for i in range(1, n_mfcc+1):
        column_names.append(f"mfcc_{i}_std")
    
    # Chroma mean & std
    for i in range(1, n_chroma+1):
        column_names.append(f"chroma_{i}_mean")
    for i in range(1, n_chroma+1):
        column_names.append(f"chroma_{i}_std")

    # Spectral contrast mean & std
    for i in range(1, n_spec_contrast+1):
        column_names.append(f"spec_contrast_{i}_mean")
    for i in range(1, n_spec_contrast+1):
        column_names.append(f"spec_contrast_{i}_std")

    # Tonnetz mean & std
    for i in range(1, n_tonnetz+1):
        column_names.append(f"tonnetz_{i}_mean")
    for i in range(1, n_tonnetz+1):
        column_names.append(f"tonnetz_{i}_std")
    
    # Tempo
    column_names.append("tempo")
    column_names.append("mean_spectral_rolloff")
    column_names.append("std_spectral_rolloff")
    column_names.append("mean_spectral_centroids")
    column_names.append("std_spectral_centroids")
    
    # Zero Crossing Rate
    column_names.append("zc_rate")
    
    return column_names
        
df = pd.DataFrame(np.row_stack(entries), columns=get_classical_feature_names())
df.to_csv("../data/files/audio_classification.csv")

../data/genres\blues/blues.00000.wav
../data/genres\blues/blues.00001.wav
../data/genres\blues/blues.00002.wav


KeyboardInterrupt: 

In [10]:
files = get_all_wav_files("../data/genres")
entries = []

def clean_labels(s):
    return s.split('.')[0]

def extract_features(file_path,sr=22050,total_duration=30.0,segment_duration=5,n_mfcc=32):
    
    # Load
    y, sr = librosa.load(file_path, sr=sr, mono=True, duration=30)
    
    num_samples = len(y)

    # 2. Calculate how many segments we can extract
    samples_per_segment = int(segment_duration * sr)
    num_segments = 6
    
    # 3. For each segment, extract features
    all_segments = []
    for i in range(num_segments):
        features = []
        start = i * samples_per_segment
        end = start + samples_per_segment
        segment_y = y[start:end]
        
        # MFCCs
        mfcc = librosa.feature.mfcc(y=segment_y, sr=sr, n_mfcc=n_mfcc)
        mfcc_mean = mfcc.mean(axis=1)
        mfcc_std = mfcc.std(axis=1)
        features.extend(mfcc_mean)
        features.extend(mfcc_std)
        
        # Chroma features
        chroma = librosa.feature.chroma_stft(y=segment_y, sr=sr)
        chroma_mean = chroma.mean(axis=1)
        chroma_std = chroma.std(axis=1)
        features.extend(chroma_mean)
        features.extend(chroma_std)
        
        # 3. Spectral contrast
        spec_contrast = librosa.feature.spectral_contrast(y=segment_y, sr=sr)
        spec_contrast_mean = spec_contrast.mean(axis=1)
        spec_contrast_std = spec_contrast.std(axis=1)
        features.extend(spec_contrast_mean)
        features.extend(spec_contrast_std)
        
        # 4. Tonnetz features (need harmonic component of audio for tonnetz)
        y_harm = librosa.effects.harmonic(segment_y)  # isolate harmonic component
        tonnetz = librosa.feature.tonnetz(y=y_harm, sr=sr)
        tonnetz_mean = tonnetz.mean(axis=1)
        tonnetz_std = tonnetz.std(axis=1)
        features.extend(tonnetz_mean)
        features.extend(tonnetz_std)
        
        # 5. Tempo 
        tempo, _ = librosa.beat.beat_track(y=segment_y, sr=sr)
        features.append(tempo[0])
        
        # 6. Spectral Rolloff
        spectral_rolloff = librosa.feature.spectral_rolloff(y=segment_y, sr=sr)[0]
        features.append(spectral_rolloff.mean())
        features.append(spectral_rolloff.std())
        
        # 7. Spectral Centroids
        spectral_centroids = librosa.feature.spectral_centroid(y=segment_y, sr=sr)[0]
        features.append(spectral_centroids.mean())
        features.append(spectral_centroids.std())
        
        # 9. Zero Crossings Rate
        zc = librosa.zero_crossings(y=segment_y, pad=False)
        features.append(sum(zc)/len(zc))
        
        rms = librosa.feature.rms(y=segment_y)
        features.append(rms.mean())
        features.append(rms.std())
        
        if file_path.endswith("_m2.wav") or file_path.endswith("_p2.wav"):
            features.append(1)
        else:
            features.append(0)
            
        all_segments.append(features)
    return all_segments

for file in files:
    try:
        print(file)
        filename = file.split('/')[-1]
        row = extract_features(file)
        new_row = [clean_labels(filename)] + [row]
        
        if file.endswith("_m2.wav") or file.endswith("_p2.wav"):
            new_row.append(1)
        else:
            new_row.append(0)

        entries.append(new_row)
    except Exception as e:
        print(e)
        print(f"Could not process {file}")
        
df = pd.DataFrame(entries)
df.to_csv(f"../data/files/audio_classification_5sec_deep_32.csv")

../data/genres\blues/blues.00000.wav
../data/genres\blues/blues.00000_m2.wav
../data/genres\blues/blues.00000_p2.wav
../data/genres\blues/blues.00001.wav
../data/genres\blues/blues.00001_m2.wav
../data/genres\blues/blues.00001_p2.wav
../data/genres\blues/blues.00002.wav
../data/genres\blues/blues.00002_m2.wav
../data/genres\blues/blues.00002_p2.wav
../data/genres\blues/blues.00003.wav
../data/genres\blues/blues.00003_m2.wav
../data/genres\blues/blues.00003_p2.wav
../data/genres\blues/blues.00004.wav
../data/genres\blues/blues.00004_m2.wav
../data/genres\blues/blues.00004_p2.wav
../data/genres\blues/blues.00005.wav
../data/genres\blues/blues.00005_m2.wav
../data/genres\blues/blues.00005_p2.wav
../data/genres\blues/blues.00006.wav
../data/genres\blues/blues.00006_m2.wav
../data/genres\blues/blues.00006_p2.wav
../data/genres\blues/blues.00007.wav
../data/genres\blues/blues.00007_m2.wav
../data/genres\blues/blues.00007_p2.wav
../data/genres\blues/blues.00008.wav
../data/genres\blues/blues.

  y, sr = librosa.load(file_path, sr=sr, mono=True, duration=30)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


../data/genres\jazz/jazz.00055_m2.wav
../data/genres\jazz/jazz.00055_p2.wav
../data/genres\jazz/jazz.00056.wav
../data/genres\jazz/jazz.00056_m2.wav
../data/genres\jazz/jazz.00056_p2.wav
../data/genres\jazz/jazz.00057.wav
../data/genres\jazz/jazz.00057_m2.wav
../data/genres\jazz/jazz.00057_p2.wav
../data/genres\jazz/jazz.00058.wav
../data/genres\jazz/jazz.00058_m2.wav
../data/genres\jazz/jazz.00058_p2.wav
../data/genres\jazz/jazz.00059.wav
../data/genres\jazz/jazz.00059_m2.wav
../data/genres\jazz/jazz.00059_p2.wav
../data/genres\jazz/jazz.00060.wav
../data/genres\jazz/jazz.00060_m2.wav
../data/genres\jazz/jazz.00060_p2.wav
../data/genres\jazz/jazz.00061.wav
../data/genres\jazz/jazz.00061_m2.wav
../data/genres\jazz/jazz.00061_p2.wav
../data/genres\jazz/jazz.00062.wav
../data/genres\jazz/jazz.00062_m2.wav
../data/genres\jazz/jazz.00062_p2.wav
../data/genres\jazz/jazz.00063.wav
../data/genres\jazz/jazz.00063_m2.wav
../data/genres\jazz/jazz.00063_p2.wav
../data/genres\jazz/jazz.00064.wav
.

In [12]:
entry_clean = []

for entry in entries:
    entry_clean.append([entry[0]] + entry[1])

def get_classical_feature_names(n_mfcc=13, n_chroma=12, n_spec_contrast=7, n_tonnetz=6):
    """
    Returns a list of feature names for the classical features: 
    MFCCs, Chroma, Spectral Contrast, Tonnetz, Tempo.
    """
    column_names = ['target']
    
    # MFCC mean & std
    for i in range(1, n_mfcc+1):
        column_names.append(f"mfcc_{i}_mean")
    for i in range(1, n_mfcc+1):
        column_names.append(f"mfcc_{i}_std")
    
    # Chroma mean & std
    for i in range(1, n_chroma+1):
        column_names.append(f"chroma_{i}_mean")
    for i in range(1, n_chroma+1):
        column_names.append(f"chroma_{i}_std")

    # Spectral contrast mean & std
    for i in range(1, n_spec_contrast+1):
        column_names.append(f"spec_contrast_{i}_mean")
    for i in range(1, n_spec_contrast+1):
        column_names.append(f"spec_contrast_{i}_std")

    # Tonnetz mean & std
    for i in range(1, n_tonnetz+1):
        column_names.append(f"tonnetz_{i}_mean")
    for i in range(1, n_tonnetz+1):
        column_names.append(f"tonnetz_{i}_std")
    
    # Tempo
    column_names.append("tempo")
    column_names.append("mean_spectral_rolloff")
    column_names.append("std_spectral_rolloff")
    column_names.append("mean_spectral_centroids")
    column_names.append("std_spectral_centroids")
    
    # Zero Crossing Rate
    column_names.append("zc_rate")
    
    
    # rms mean and rms var
    column_names.append("rms_mean")
    column_names.append("rms_var")
    
    column_names.append("augmented")
    
    return column_names


df = pd.DataFrame(entries, columns=get_classical_feature_names())
df.to_csv(f"../data/files/audio_classification_5sec_big.csv", index=False)

In [9]:
df = pd.DataFrame(entries)
df.to_csv(f"../data/files/audio_classification_5sec_deep.csv")

In [None]:
files = get_all_image_files('../data/images/')
mel_spectograms = []

for file in files:
    try:
        filename = file.split('/')[-1]
        print(filename)
        img = Image.open(file).convert('L')  # ensure RGB format
        img_array = np.asarray(img).flatten()
        img_array = img_array.astype(np.float16)
        mel_spectograms.append([filename] + img_array.tolist())
        img.close()
    except Exception as e:
        print(e)
        print(f"Could not process {file}")
        
df = pd.DataFrame.from_records(mel_spectograms)
df.head(10)
df.to_csv("../data/files/mel_spectograms_flat.csv")

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras import utils

def clean_labels(s):
    return s.split('.')[0]

labelMap = {
    "blues": 0,
    "classical": 1,
    "country": 2,
    "disco": 3,
    "hiphop": 4,
    "jazz": 5,
    "metal": 6,
    "pop": 7,
    "reggae": 8,
    "rock": 9
}

X = df['features']
y = np.array(list(map(lambda x: labelMap[x], df['label'])))
y_cat = utils.to_categorical(y, num_classes=10)

X_train, X_test, y_train, y_test = train_test_split(X, y_cat, random_state=1122)

scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

In [None]:
from torch.utils.data import DataLoader

train_loader = DataLoader(pd.concat([X_train_scaled, y_train]), batch_size=8, shuffle=True, drop_last=True)
test_loader  = DataLoader(test_dataset, batch_size=8, shuffle=False, drop_last=False)

In [None]:
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

history = model.fit(
    X_train, y_train,
    epochs=100,
    batch_size=4,
    validation_split=0.2
)

In [None]:
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=2)
print(f"Test Accuracy: {test_acc:.2%}")

y_pred = model.predict(X_test).argmax(axis=1)
y_true_labels = np.argmax(y_test, axis=1)
print(classification_report(y_true_labels, y_pred))

# Confusion Matrix
cm = confusion_matrix(y_true_labels, y_pred)
plt.figure(figsize=(10,7))
sns.heatmap(cm, annot=True, fmt='d', xticklabels=labels, yticklabels=labels, cmap='Blues')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()

In [1]:
import string
A = string.ascii_uppercase

def vig_autokey_decrypt(cipher, seed):
    pt  = ''
    key = list(seed)                      # running key grows with revealed plaintext
    for c in cipher:
        ki  = A.index(key.pop(0))         # take next key symbol
        ci  = A.index(c)
        pi  = (ci - ki) % 26
        p   = A[pi]
        pt += p
        key.append(p)                     # autokey: append plaintext char
    return pt

C = "CANYOUFINDWHATLIVESINTIMEFLOWINGTHROUGHEACHMEASUREDRHYMEWISDOMWAITSI"
K = "MBJKDMSFF"
plain = vig_autokey_decrypt(C, K)
print(plain)

QZEOLINDINXDMIDVSWFLKHAJJNPJLYGGKYEZLVJYUSJIFPXLTKLIZTXHLPISGNDDBIDA
