<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#define-location-and-gather-wav-files" data-toc-modified-id="define-location-and-gather-wav-files-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>define location and gather wav files</a></span></li><li><span><a href="#create-X-categorical-dataframe-and-y-target-dataframe" data-toc-modified-id="create-X-categorical-dataframe-and-y-target-dataframe-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>create X categorical dataframe and y target dataframe</a></span></li><li><span><a href="#define-function-to-extract-feature-data-from-audio" data-toc-modified-id="define-function-to-extract-feature-data-from-audio-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>define function to extract feature data from audio</a></span></li><li><span><a href="#run-functions-and-save-info" data-toc-modified-id="run-functions-and-save-info-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>run functions and save info</a></span></li><li><span><a href="#process-categorical-data" data-toc-modified-id="process-categorical-data-5"><span class="toc-item-num">5&nbsp;&nbsp;</span>process categorical data</a></span></li><li><span><a href="#save-feature-array-and-target-data-array" data-toc-modified-id="save-feature-array-and-target-data-array-6"><span class="toc-item-num">6&nbsp;&nbsp;</span>save feature array and target data array</a></span></li></ul></div>

In [2]:
# directory library
import os
os.environ['LIBROSA_CACHE_DIR'] = '/tmp/librosa_cache'
import librosa
from pathlib import Path
# progress library
from tqdm import tqdm
# audio modules
import audioread
from librosa import feature
import librosa.display 
import soundfile
import tensorflow as tf
import numpy as np
import pandas as pd 
import scipy
from scipy import stats
import matplotlib.pyplot as plt
%matplotlib inline
import re

from collections import Counter

In [3]:
librosa.cache.clear()



# define location and gather wav files

In [4]:
# define path to audio files
audio_path = Path('/Users/badmin/DSC/birdcall_project/trial/')
project = '/Users/badmin/DSC/birdcall_project/'

In [5]:
# gather all wav audio files
files = librosa.util.find_files(audio_path, ext=['wav']) 
filesArray = np.asarray(files)
filesArray

array(['/Users/badmin/DSC/birdcall_project/trial/XC102968.wav.wav',
       '/Users/badmin/DSC/birdcall_project/trial/XC102969.wav.wav',
       '/Users/badmin/DSC/birdcall_project/trial/XC102972.wav.wav', ...,
       '/Users/badmin/DSC/birdcall_project/trial/XC93427.wav.wav',
       '/Users/badmin/DSC/birdcall_project/trial/XC99545.wav.wav',
       '/Users/badmin/DSC/birdcall_project/trial/XC99572.wav.wav'],
      dtype='<U57')

In [6]:

files2df = list(filesArray)
files_df = pd.DataFrame(files2df)

#files_df['filename'] = re.sub("/Users/badmin/DSC/birdcall_project/wav_audio_100/", '', files)

In [7]:
files_df['filename'] = files2df
files_df['filename'].replace(regex=True, inplace=True, to_replace=r'/Users/badmin/DSC/birdcall_project/wav_audio_100/', value=r'')

In [8]:
files_df = files_df['filename']

# create X categorical dataframe and y target dataframe

In [9]:
data = pd.read_csv("data_100.csv")

In [10]:
X_cat_data = data.copy()

In [11]:
X_cat_df =  pd.merge(files_df, X_cat_data, on='filename', how='left')

In [12]:
X_cat_df.drop(columns=['Unnamed: 0', 'filename', 'ebird_code', 'counts'
                ], inplace=True)

In [53]:
X_cat_df.shape

(3200, 3)

In [54]:
# drop unneccessary columns
target_data = data.copy()
target_data.drop(columns=['Unnamed: 0', 'season', 'tod',  'country', 'counts'
                ], inplace=True)

In [55]:
target_df =  pd.merge(files_df, target_data, on='filename', how='left')

In [56]:
y_df = target_df.ebird_code

In [57]:
classes = np.unique(target_df.ebird_code)

In [58]:
classes

array(['amegfi', 'amerob', 'annhum', 'astfly', 'bewwre', 'bkhgro',
       'bnhcow', 'brespa', 'buggna', 'bulori', 'cacwre', 'comgra',
       'daejun', 'dowwoo', 'foxspa', 'gnttow', 'gockin', 'herthr',
       'indbun', 'marwre', 'norcar', 'pasfly', 'purfin', 'rebwoo',
       'scoori', 'sonspa', 'spotow', 'swaspa', 'warvir', 'wesmea',
       'whbnut', 'whcspa'], dtype=object)

# define function to extract feature data from audio

In [32]:
def stat_feats(path):
   
    X, sr = librosa.load(path, sr=32000, duration=30)
    n = len(X)
    n_fft=2048
    y_pad = librosa.util.fix_length(X, n + n_fft // 2, mode='reflect')
    
   
    freqs = np.fft.rfftfreq(n, d=1/sr)
    result = np.array([])
    mean = np.mean(freqs)
    result = np.hstack((result, mean))
    std = np.std(freqs) 
    result = np.hstack((result, std))
    maxv = np.amax(freqs) 
    result = np.hstack((result, maxv))
    minv = np.amin(freqs) 
    result = np.hstack((result, minv))
    median = np.median(freqs)
    result = np.hstack((result, median))
    mode = scipy.stats.mode(freqs)[0][0]
    result = np.hstack((result, mode))

    return result
    

In [15]:
# function defining features to extract to vector
def extract_features(path):
    n_fft = 2048
   
    fmin = 100
    fmax = 14000
    
    #load audio
    X, sr = librosa.load(path, sr=32000, duration=30)
    n = len(X)
    y_pad = librosa.util.fix_length(X, n + n_fft // 2, mode='reflect')
    # extract short term fourier transform sequence information from audio file
    stft = np.abs(librosa.stft(y_pad, 
                               n_fft=n_fft, 
                               center=True,
                               ))

    result = np.array([])

    #spectral mean
    chroma = np.mean(librosa.feature.chroma_stft(S=stft, 
                                                 sr=sr,
                                                 n_fft=n_fft,
                                                 ).T,axis=0)
    result = np.hstack((result, chroma))

    

    #spectral deviation
    chromas = np.std(librosa.feature.chroma_stft(S=stft, 
                                                 sr=sr,
                                                 n_fft=n_fft,
                                                 ).T,axis=0)
    result = np.hstack((result, chromas))

    spec_centroid = np.mean(librosa.feature.spectral_centroid(y=X).T, axis=0)
    result = np.hstack((result, spec_centroid))
  
    spec_centroidM = np.max(librosa.feature.spectral_centroid(y=X).T, axis=0)
    result = np.hstack((result, spec_centroidM))

    # extract mel cepstral coefficients 
    mfccs = np.std(librosa.feature.mfcc(y=X, sr=sr, 
                                        n_fft=n_fft,
                                        lifter=2, 
                                        n_mfcc=12).T, axis=0)
    mfccm = np.mean(librosa.feature.mfcc(y=X, sr=sr, 
                                        n_fft=n_fft,
                                        lifter=2, 
                                        n_mfcc=12).T, axis=0)
    result = np.hstack((result, mfccm))
    mfccM = np.max(librosa.feature.mfcc(y=X, sr=sr, 
                                        n_fft=n_fft,
                                        lifter=2, 
                                        n_mfcc=12).T, axis=0)
    result = np.hstack((result, mfccM))
    mfccme = np.median(librosa.feature.mfcc(y=X, sr=sr, n_fft=n_fft,
                                         lifter=2, n_mfcc=12).T, axis=0)
    result = np.hstack((result, mfccme))
    
    # melspectrogram
    mel = np.mean(librosa.feature.melspectrogram(S=stft, 
                                                 sr=sr,
                                                 n_fft=n_fft,
                                                 n_mels=64).T, axis=0)
 
    result = np.hstack((result, mel))
    mels = np.std(librosa.feature.melspectrogram(S=stft, 
                                                 sr=sr,
                                                 n_fft=n_fft,
                                                 n_mels=64).T, axis=0)
 
    result = np.hstack((result, mels))
    melM = np.max(librosa.feature.melspectrogram(S=stft, 
                                                 sr=sr,
                                                 n_fft=n_fft,
                                                 n_mels=64).T, axis=0)
 
    result = np.hstack((result, melM))
    

# onset envelope

    onset_m = np.max(librosa.onset.onset_strength(S=stft, 
                                             sr=sr,
                                            threshold=0.2).T, axis=0)
    result = np.hstack((result, onset_m))
    onset_M = np.mean(librosa.onset.onset_strength(S=stft, 
                                             sr=sr,
                                            threshold=0.2).T, axis=0)
    result = np.hstack((result, onset_M))

    
    return result



In [58]:
    mfList = []
    for path in tqdm(filesArray):
        y, sr = librosa.load(path, sr=32000, duration=30)
        mfcc = librosa.feature.mfcc(y, sr=sr, n_fft=1024, n_mfcc=12)
        mfList.append(mfcc)


  0%|          | 0/3200 [00:00<?, ?it/s][A
  0%|          | 6/3200 [00:00<00:58, 55.07it/s][A
  0%|          | 12/3200 [00:00<00:58, 54.46it/s][A
  1%|          | 17/3200 [00:00<01:00, 52.27it/s][A
  1%|          | 23/3200 [00:00<00:59, 53.43it/s][A
  1%|          | 28/3200 [00:00<01:02, 50.63it/s][A
  1%|          | 34/3200 [00:00<01:00, 52.65it/s][A
  1%|▏         | 40/3200 [00:00<00:59, 53.28it/s][A
  1%|▏         | 45/3200 [00:00<01:02, 50.23it/s][A
  2%|▏         | 51/3200 [00:00<00:59, 52.53it/s][A
  2%|▏         | 58/3200 [00:01<00:56, 55.15it/s][A
  2%|▏         | 64/3200 [00:01<00:59, 52.45it/s][A
  2%|▏         | 70/3200 [00:01<00:58, 53.94it/s][A
  2%|▏         | 76/3200 [00:01<01:00, 51.54it/s][A
  3%|▎         | 82/3200 [00:01<01:06, 47.10it/s][A
  3%|▎         | 87/3200 [00:01<01:07, 46.46it/s][A
  3%|▎         | 92/3200 [00:01<01:07, 46.33it/s][A
  3%|▎         | 97/3200 [00:01<01:07, 46.13it/s][A
  3%|▎         | 102/3200 [00:02<01:06, 46.25it/s][A
 

 58%|█████▊    | 1861/3200 [00:34<00:22, 58.23it/s][A
 58%|█████▊    | 1867/3200 [00:35<00:25, 51.62it/s][A
 59%|█████▊    | 1874/3200 [00:35<00:24, 54.22it/s][A
 59%|█████▉    | 1880/3200 [00:35<00:23, 55.14it/s][A
 59%|█████▉    | 1887/3200 [00:35<00:22, 58.43it/s][A
 59%|█████▉    | 1893/3200 [00:35<00:23, 56.80it/s][A
 59%|█████▉    | 1899/3200 [00:35<00:23, 55.16it/s][A
 60%|█████▉    | 1905/3200 [00:35<00:23, 54.24it/s][A
 60%|█████▉    | 1911/3200 [00:35<00:23, 55.21it/s][A
 60%|█████▉    | 1919/3200 [00:35<00:21, 60.07it/s][A
 60%|██████    | 1926/3200 [00:36<00:20, 61.74it/s][A
 60%|██████    | 1933/3200 [00:36<00:20, 62.82it/s][A
 61%|██████    | 1940/3200 [00:36<00:20, 61.47it/s][A
 61%|██████    | 1947/3200 [00:36<00:19, 63.30it/s][A
 61%|██████    | 1954/3200 [00:36<00:19, 64.16it/s][A
 61%|██████▏   | 1961/3200 [00:36<00:19, 62.12it/s][A
 62%|██████▏   | 1968/3200 [00:36<00:19, 62.08it/s][A
 62%|██████▏   | 1975/3200 [00:36<00:20, 59.63it/s][A
 62%|█████

In [59]:
mfcc_df = pd.DataFrame(mfList)

In [60]:
mfcc_df.to_csv('mfccs100.csv')

In [22]:
def energy(path):
    y, sr = librosa.load(path, sr=32000, duration=30)
    resultS = np.array([])
    
    S = librosa.magphase(librosa.stft(y))[0]
    
    rmss = np.std(librosa.feature.rms(S=S))
    resultS = np.hstack((resultS, rmss))
    rmsM = np.mean(librosa.feature.rms(S=S))
    resultS = np.hstack((resultS, rmsM))

    
    return resultS

# run functions and save info

In [91]:
print(len(filesArray))

6400


In [33]:
# feature extraction
stat_features = []
# extract vector info to X and y is the filenames ordered per extraction
for files in tqdm(filesArray):
    features = stat_feats(files)
    stat_features.append(features)


  0%|          | 0/3200 [00:00<?, ?it/s][A
  0%|          | 9/3200 [00:00<00:38, 83.19it/s][A
  0%|          | 15/3200 [00:00<00:42, 74.53it/s][A
  1%|          | 22/3200 [00:00<00:45, 69.16it/s][A
  1%|          | 32/3200 [00:00<00:41, 75.80it/s][A
  1%|▏         | 42/3200 [00:00<00:40, 78.54it/s][A
  2%|▏         | 51/3200 [00:00<00:40, 78.55it/s][A
  2%|▏         | 61/3200 [00:00<00:38, 82.54it/s][A
  2%|▏         | 71/3200 [00:00<00:37, 83.10it/s][A
  2%|▏         | 79/3200 [00:01<00:44, 70.28it/s][A
  3%|▎         | 87/3200 [00:01<00:47, 65.19it/s][A
  3%|▎         | 94/3200 [00:01<00:50, 61.63it/s][A
  3%|▎         | 101/3200 [00:01<00:52, 58.50it/s][A
  3%|▎         | 111/3200 [00:01<00:46, 66.34it/s][A
  4%|▎         | 119/3200 [00:01<00:46, 65.89it/s][A
  4%|▍         | 133/3200 [00:01<00:40, 75.99it/s][A
  4%|▍         | 142/3200 [00:01<00:41, 73.92it/s][A
  5%|▍         | 150/3200 [00:02<00:46, 65.36it/s][A
  5%|▍         | 158/3200 [00:02<00:47, 63.85it/s

 40%|████      | 1287/3200 [00:18<00:25, 75.34it/s][A
 41%|████      | 1298/3200 [00:18<00:23, 79.73it/s][A
 41%|████      | 1307/3200 [00:18<00:23, 79.59it/s][A
 41%|████      | 1316/3200 [00:18<00:24, 76.00it/s][A
 41%|████▏     | 1324/3200 [00:18<00:27, 68.61it/s][A
 42%|████▏     | 1332/3200 [00:18<00:26, 71.41it/s][A
 42%|████▏     | 1340/3200 [00:18<00:29, 64.00it/s][A
 42%|████▏     | 1347/3200 [00:19<00:29, 63.18it/s][A
 42%|████▏     | 1354/3200 [00:19<00:32, 57.43it/s][A
 42%|████▎     | 1360/3200 [00:19<00:32, 55.81it/s][A
 43%|████▎     | 1366/3200 [00:19<00:35, 51.91it/s][A
 43%|████▎     | 1372/3200 [00:19<00:36, 49.74it/s][A
 43%|████▎     | 1380/3200 [00:19<00:32, 55.64it/s][A
 43%|████▎     | 1389/3200 [00:19<00:29, 62.26it/s][A
 44%|████▎     | 1396/3200 [00:19<00:28, 64.09it/s][A
 44%|████▍     | 1406/3200 [00:20<00:25, 69.70it/s][A
 44%|████▍     | 1414/3200 [00:20<00:25, 69.02it/s][A
 45%|████▍     | 1427/3200 [00:20<00:22, 79.41it/s][A
 45%|████▍

 83%|████████▎ | 2647/3200 [00:36<00:06, 84.50it/s][A
 83%|████████▎ | 2656/3200 [00:36<00:07, 75.12it/s][A
 83%|████████▎ | 2666/3200 [00:36<00:07, 76.16it/s][A
 84%|████████▎ | 2674/3200 [00:36<00:07, 74.80it/s][A
 84%|████████▍ | 2682/3200 [00:36<00:07, 70.44it/s][A
 84%|████████▍ | 2691/3200 [00:36<00:06, 74.26it/s][A
 84%|████████▍ | 2699/3200 [00:36<00:06, 72.69it/s][A
 85%|████████▍ | 2708/3200 [00:36<00:06, 75.15it/s][A
 85%|████████▍ | 2718/3200 [00:37<00:06, 79.56it/s][A
 85%|████████▌ | 2727/3200 [00:37<00:06, 74.71it/s][A
 86%|████████▌ | 2739/3200 [00:37<00:05, 82.16it/s][A
 86%|████████▌ | 2749/3200 [00:37<00:05, 85.99it/s][A
 86%|████████▌ | 2758/3200 [00:37<00:05, 81.33it/s][A
 86%|████████▋ | 2767/3200 [00:37<00:05, 81.95it/s][A
 87%|████████▋ | 2776/3200 [00:37<00:05, 80.06it/s][A
 87%|████████▋ | 2785/3200 [00:37<00:05, 74.34it/s][A
 87%|████████▋ | 2794/3200 [00:38<00:05, 76.49it/s][A
 88%|████████▊ | 2802/3200 [00:38<00:05, 75.73it/s][A
 88%|█████

In [81]:
stat_df = pd.DataFrame(stat_features)
stat_df.shape

(3200, 6)

In [82]:
stat_df.to_csv('stat1002.csv')

In [18]:
# feature extraction
Xfeatures = []
# extract vector info to X and y is the filenames ordered per extraction
for files in tqdm(filesArray):
    features = extract_features(files)
    Xfeatures.append(features)


  0%|          | 0/3200 [00:00<?, ?it/s][A
  0%|          | 4/3200 [00:00<01:56, 27.33it/s][A
  0%|          | 6/3200 [00:00<03:50, 13.85it/s][A
  0%|          | 8/3200 [00:00<05:18, 10.03it/s][A
  0%|          | 9/3200 [00:01<11:31,  4.62it/s][A
  0%|          | 10/3200 [00:01<09:48,  5.42it/s][A
  0%|          | 12/3200 [00:01<09:48,  5.42it/s][A
  0%|          | 13/3200 [00:02<12:56,  4.11it/s][A
  0%|          | 14/3200 [00:02<13:08,  4.04it/s][A
  0%|          | 15/3200 [00:02<18:04,  2.94it/s][A
  1%|          | 17/3200 [00:03<17:22,  3.05it/s][A
  1%|          | 18/3200 [00:04<19:47,  2.68it/s][A
  1%|          | 19/3200 [00:04<20:50,  2.54it/s][A
  1%|          | 21/3200 [00:04<16:20,  3.24it/s][A
  1%|          | 22/3200 [00:05<18:30,  2.86it/s][A
  1%|          | 24/3200 [00:05<13:50,  3.82it/s][A
  1%|          | 26/3200 [00:05<14:11,  3.73it/s][A
  1%|          | 28/3200 [00:05<11:06,  4.76it/s][A
  1%|          | 29/3200 [00:06<10:59,  4.81it/s][A
  1%|

  7%|▋         | 218/3200 [00:49<10:42,  4.64it/s][A
  7%|▋         | 219/3200 [00:49<12:13,  4.06it/s][A
  7%|▋         | 221/3200 [00:50<10:39,  4.66it/s][A
  7%|▋         | 223/3200 [00:50<12:00,  4.13it/s][A
  7%|▋         | 224/3200 [00:51<13:33,  3.66it/s][A
  7%|▋         | 226/3200 [00:51<12:23,  4.00it/s][A
  7%|▋         | 228/3200 [00:51<09:55,  4.99it/s][A
  7%|▋         | 230/3200 [00:52<09:55,  4.98it/s][A
  7%|▋         | 232/3200 [00:52<09:06,  5.44it/s][A
  7%|▋         | 233/3200 [00:52<12:45,  3.88it/s][A
  7%|▋         | 235/3200 [00:52<09:56,  4.97it/s][A
  7%|▋         | 236/3200 [00:53<10:48,  4.57it/s][A
  7%|▋         | 238/3200 [00:53<11:52,  4.16it/s][A
  7%|▋         | 239/3200 [00:54<15:52,  3.11it/s][A
  8%|▊         | 240/3200 [00:54<18:52,  2.61it/s][A
  8%|▊         | 241/3200 [00:55<16:56,  2.91it/s][A
  8%|▊         | 243/3200 [00:55<12:42,  3.88it/s][A
  8%|▊         | 244/3200 [00:55<15:44,  3.13it/s][A
  8%|▊         | 245/3200 [0

 13%|█▎        | 417/3200 [01:36<19:34,  2.37it/s][A
 13%|█▎        | 419/3200 [01:37<17:41,  2.62it/s][A
 13%|█▎        | 421/3200 [01:37<13:07,  3.53it/s][A
 13%|█▎        | 422/3200 [01:37<11:51,  3.91it/s][A
 13%|█▎        | 423/3200 [01:37<11:09,  4.15it/s][A
 13%|█▎        | 424/3200 [01:38<14:28,  3.20it/s][A
 13%|█▎        | 425/3200 [01:38<11:58,  3.86it/s][A
 13%|█▎        | 426/3200 [01:38<11:23,  4.06it/s][A
 13%|█▎        | 427/3200 [01:39<14:32,  3.18it/s][A
 13%|█▎        | 428/3200 [01:39<13:59,  3.30it/s][A
 13%|█▎        | 429/3200 [01:39<16:38,  2.78it/s][A
 13%|█▎        | 431/3200 [01:39<12:27,  3.71it/s][A
 14%|█▎        | 433/3200 [01:40<10:33,  4.37it/s][A
 14%|█▎        | 434/3200 [01:40<09:06,  5.06it/s][A
 14%|█▎        | 435/3200 [01:40<13:17,  3.47it/s][A
 14%|█▎        | 437/3200 [01:40<10:37,  4.33it/s][A
 14%|█▎        | 439/3200 [01:41<08:54,  5.17it/s][A
 14%|█▍        | 441/3200 [01:41<08:57,  5.13it/s][A
 14%|█▍        | 442/3200 [0

 20%|█▉        | 638/3200 [02:22<12:33,  3.40it/s][A
 20%|█▉        | 639/3200 [02:22<12:27,  3.43it/s][A
 20%|██        | 640/3200 [02:23<10:04,  4.23it/s][A
 20%|██        | 641/3200 [02:23<08:28,  5.03it/s][A
 20%|██        | 643/3200 [02:23<07:05,  6.01it/s][A
 20%|██        | 644/3200 [02:23<10:36,  4.01it/s][A
 20%|██        | 645/3200 [02:24<13:16,  3.21it/s][A
 20%|██        | 646/3200 [02:24<10:47,  3.94it/s][A
 20%|██        | 647/3200 [02:24<09:28,  4.49it/s][A
 20%|██        | 648/3200 [02:24<10:01,  4.24it/s][A
 20%|██        | 649/3200 [02:24<10:03,  4.23it/s][A
 20%|██        | 650/3200 [02:25<11:45,  3.61it/s][A
 20%|██        | 651/3200 [02:25<10:05,  4.21it/s][A
 20%|██        | 653/3200 [02:26<10:36,  4.00it/s][A
 20%|██        | 654/3200 [02:26<12:42,  3.34it/s][A
 20%|██        | 655/3200 [02:26<11:15,  3.77it/s][A
 20%|██        | 656/3200 [02:27<14:22,  2.95it/s][A
 21%|██        | 657/3200 [02:27<12:02,  3.52it/s][A
 21%|██        | 658/3200 [0

 26%|██▌       | 816/3200 [03:10<09:44,  4.08it/s][A
 26%|██▌       | 818/3200 [03:10<07:40,  5.18it/s][A
 26%|██▌       | 819/3200 [03:10<07:49,  5.07it/s][A
 26%|██▌       | 820/3200 [03:10<06:46,  5.85it/s][A
 26%|██▌       | 822/3200 [03:10<05:56,  6.67it/s][A
 26%|██▌       | 823/3200 [03:10<06:22,  6.21it/s][A
 26%|██▌       | 825/3200 [03:11<07:38,  5.17it/s][A
 26%|██▌       | 826/3200 [03:11<11:12,  3.53it/s][A
 26%|██▌       | 827/3200 [03:12<12:25,  3.18it/s][A
 26%|██▌       | 828/3200 [03:12<14:06,  2.80it/s][A
 26%|██▌       | 829/3200 [03:13<14:05,  2.80it/s][A
 26%|██▌       | 830/3200 [03:13<11:19,  3.49it/s][A
 26%|██▌       | 832/3200 [03:13<09:06,  4.34it/s][A
 26%|██▌       | 833/3200 [03:13<11:06,  3.55it/s][A
 26%|██▌       | 834/3200 [03:14<09:47,  4.03it/s][A
 26%|██▌       | 835/3200 [03:14<10:24,  3.79it/s][A
 26%|██▌       | 837/3200 [03:14<08:03,  4.89it/s][A
 26%|██▌       | 839/3200 [03:14<06:24,  6.14it/s][A
 26%|██▋       | 841/3200 [0

 32%|███▏      | 1022/3200 [03:54<14:58,  2.42it/s][A
 32%|███▏      | 1023/3200 [03:55<17:19,  2.09it/s][A
 32%|███▏      | 1024/3200 [03:56<18:05,  2.00it/s][A
 32%|███▏      | 1026/3200 [03:56<15:06,  2.40it/s][A
 32%|███▏      | 1027/3200 [03:56<13:29,  2.68it/s][A
 32%|███▏      | 1028/3200 [03:57<14:43,  2.46it/s][A
 32%|███▏      | 1029/3200 [03:57<14:34,  2.48it/s][A
 32%|███▏      | 1030/3200 [03:57<12:01,  3.01it/s][A
 32%|███▏      | 1031/3200 [03:58<13:29,  2.68it/s][A
 32%|███▏      | 1032/3200 [03:58<12:47,  2.82it/s][A
 32%|███▏      | 1034/3200 [03:59<12:05,  2.99it/s][A
 32%|███▏      | 1036/3200 [03:59<10:00,  3.60it/s][A
 32%|███▏      | 1037/3200 [03:59<08:36,  4.19it/s][A
 32%|███▏      | 1038/3200 [03:59<07:16,  4.95it/s][A
 32%|███▎      | 1040/3200 [03:59<06:03,  5.94it/s][A
 33%|███▎      | 1041/3200 [04:00<09:39,  3.73it/s][A
 33%|███▎      | 1042/3200 [04:00<12:05,  2.97it/s][A
 33%|███▎      | 1043/3200 [04:01<13:42,  2.62it/s][A
 33%|███▎ 

 38%|███▊      | 1214/3200 [04:43<06:05,  5.43it/s][A
 38%|███▊      | 1216/3200 [04:44<06:43,  4.91it/s][A
 38%|███▊      | 1217/3200 [04:44<09:39,  3.42it/s][A
 38%|███▊      | 1218/3200 [04:45<09:14,  3.58it/s][A
 38%|███▊      | 1219/3200 [04:45<08:34,  3.85it/s][A
 38%|███▊      | 1220/3200 [04:45<08:12,  4.02it/s][A
 38%|███▊      | 1221/3200 [04:45<07:09,  4.61it/s][A
 38%|███▊      | 1222/3200 [04:46<09:51,  3.34it/s][A
 38%|███▊      | 1223/3200 [04:46<08:39,  3.80it/s][A
 38%|███▊      | 1225/3200 [04:46<06:42,  4.91it/s][A
 38%|███▊      | 1226/3200 [04:46<09:28,  3.47it/s][A
 38%|███▊      | 1227/3200 [04:47<08:35,  3.83it/s][A
 38%|███▊      | 1228/3200 [04:47<07:09,  4.59it/s][A
 38%|███▊      | 1229/3200 [04:47<06:34,  4.99it/s][A
 38%|███▊      | 1230/3200 [04:47<06:44,  4.87it/s][A
 38%|███▊      | 1231/3200 [04:47<07:42,  4.26it/s][A
 38%|███▊      | 1232/3200 [04:48<10:02,  3.27it/s][A
 39%|███▊      | 1233/3200 [04:48<09:10,  3.58it/s][A
 39%|███▊ 

 44%|████▎     | 1395/3200 [05:31<08:40,  3.47it/s][A
 44%|████▎     | 1397/3200 [05:31<06:33,  4.58it/s][A
 44%|████▎     | 1398/3200 [05:32<06:56,  4.32it/s][A
 44%|████▎     | 1399/3200 [05:32<05:58,  5.03it/s][A
 44%|████▍     | 1400/3200 [05:32<06:00,  4.99it/s][A
 44%|████▍     | 1401/3200 [05:32<05:15,  5.71it/s][A
 44%|████▍     | 1402/3200 [05:32<05:38,  5.31it/s][A
 44%|████▍     | 1403/3200 [05:33<05:07,  5.85it/s][A
 44%|████▍     | 1404/3200 [05:33<05:26,  5.51it/s][A
 44%|████▍     | 1405/3200 [05:33<05:13,  5.73it/s][A
 44%|████▍     | 1406/3200 [05:33<06:26,  4.64it/s][A
 44%|████▍     | 1407/3200 [05:33<06:03,  4.93it/s][A
 44%|████▍     | 1409/3200 [05:34<06:37,  4.51it/s][A
 44%|████▍     | 1410/3200 [05:34<09:12,  3.24it/s][A
 44%|████▍     | 1411/3200 [05:35<10:28,  2.85it/s][A
 44%|████▍     | 1412/3200 [05:35<09:45,  3.05it/s][A
 44%|████▍     | 1413/3200 [05:36<09:53,  3.01it/s][A
 44%|████▍     | 1415/3200 [05:36<07:34,  3.93it/s][A
 44%|████▍

 50%|████▉     | 1596/3200 [06:17<06:36,  4.04it/s][A
 50%|████▉     | 1597/3200 [06:18<06:02,  4.42it/s][A
 50%|████▉     | 1599/3200 [06:18<04:51,  5.50it/s][A
 50%|█████     | 1601/3200 [06:18<04:23,  6.08it/s][A
 50%|█████     | 1602/3200 [06:18<04:03,  6.56it/s][A
 50%|█████     | 1603/3200 [06:19<06:36,  4.02it/s][A
 50%|█████     | 1604/3200 [06:19<05:52,  4.53it/s][A
 50%|█████     | 1605/3200 [06:19<04:55,  5.39it/s][A
 50%|█████     | 1606/3200 [06:19<04:31,  5.88it/s][A
 50%|█████     | 1608/3200 [06:19<04:46,  5.56it/s][A
 50%|█████     | 1610/3200 [06:20<04:01,  6.58it/s][A
 50%|█████     | 1611/3200 [06:20<04:53,  5.41it/s][A
 50%|█████     | 1613/3200 [06:20<05:39,  4.67it/s][A
 50%|█████     | 1615/3200 [06:21<04:26,  5.94it/s][A
 50%|█████     | 1616/3200 [06:21<05:41,  4.64it/s][A
 51%|█████     | 1617/3200 [06:21<08:10,  3.23it/s][A
 51%|█████     | 1619/3200 [06:22<06:44,  3.91it/s][A
 51%|█████     | 1621/3200 [06:22<05:14,  5.02it/s][A
 51%|█████

 57%|█████▋    | 1814/3200 [06:58<06:58,  3.31it/s][A
 57%|█████▋    | 1815/3200 [06:58<08:29,  2.72it/s][A
 57%|█████▋    | 1816/3200 [06:59<06:43,  3.43it/s][A
 57%|█████▋    | 1817/3200 [06:59<05:39,  4.08it/s][A
 57%|█████▋    | 1818/3200 [06:59<04:39,  4.94it/s][A
 57%|█████▋    | 1819/3200 [06:59<05:50,  3.94it/s][A
 57%|█████▋    | 1821/3200 [06:59<04:31,  5.08it/s][A
 57%|█████▋    | 1822/3200 [06:59<03:59,  5.75it/s][A
 57%|█████▋    | 1823/3200 [07:00<03:39,  6.27it/s][A
 57%|█████▋    | 1825/3200 [07:00<03:23,  6.76it/s][A
 57%|█████▋    | 1826/3200 [07:00<03:08,  7.29it/s][A
 57%|█████▋    | 1827/3200 [07:00<03:02,  7.52it/s][A
 57%|█████▋    | 1828/3200 [07:00<04:03,  5.64it/s][A
 57%|█████▋    | 1829/3200 [07:00<03:57,  5.77it/s][A
 57%|█████▋    | 1831/3200 [07:01<03:12,  7.12it/s][A
 57%|█████▋    | 1833/3200 [07:01<03:28,  6.55it/s][A
 57%|█████▋    | 1835/3200 [07:01<03:18,  6.88it/s][A
 57%|█████▋    | 1836/3200 [07:01<04:03,  5.59it/s][A
 57%|█████

 63%|██████▎   | 2018/3200 [07:40<04:11,  4.70it/s][A
 63%|██████▎   | 2019/3200 [07:41<05:51,  3.36it/s][A
 63%|██████▎   | 2020/3200 [07:41<05:31,  3.56it/s][A
 63%|██████▎   | 2021/3200 [07:41<04:30,  4.36it/s][A
 63%|██████▎   | 2023/3200 [07:41<04:11,  4.69it/s][A
 63%|██████▎   | 2025/3200 [07:42<03:53,  5.02it/s][A
 63%|██████▎   | 2027/3200 [07:42<04:26,  4.41it/s][A
 63%|██████▎   | 2029/3200 [07:43<04:24,  4.43it/s][A
 63%|██████▎   | 2031/3200 [07:43<03:28,  5.61it/s][A
 64%|██████▎   | 2032/3200 [07:43<03:30,  5.54it/s][A
 64%|██████▎   | 2034/3200 [07:44<04:05,  4.75it/s][A
 64%|██████▎   | 2035/3200 [07:44<05:37,  3.45it/s][A
 64%|██████▎   | 2036/3200 [07:44<04:51,  3.99it/s][A
 64%|██████▎   | 2037/3200 [07:45<06:19,  3.06it/s][A
 64%|██████▎   | 2038/3200 [07:45<07:09,  2.70it/s][A
 64%|██████▎   | 2039/3200 [07:45<06:16,  3.08it/s][A
 64%|██████▍   | 2041/3200 [07:46<05:54,  3.27it/s][A
 64%|██████▍   | 2042/3200 [07:46<05:06,  3.78it/s][A
 64%|█████

 69%|██████▉   | 2220/3200 [08:25<04:45,  3.43it/s][A
 69%|██████▉   | 2221/3200 [08:25<04:05,  3.99it/s][A
 69%|██████▉   | 2222/3200 [08:25<03:55,  4.15it/s][A
 69%|██████▉   | 2223/3200 [08:26<05:06,  3.19it/s][A
 70%|██████▉   | 2225/3200 [08:26<04:46,  3.40it/s][A
 70%|██████▉   | 2226/3200 [08:26<04:25,  3.67it/s][A
 70%|██████▉   | 2228/3200 [08:26<03:35,  4.51it/s][A
 70%|██████▉   | 2229/3200 [08:27<04:14,  3.81it/s][A
 70%|██████▉   | 2230/3200 [08:27<03:28,  4.64it/s][A
 70%|██████▉   | 2232/3200 [08:27<02:51,  5.63it/s][A
 70%|██████▉   | 2234/3200 [08:27<02:17,  7.02it/s][A
 70%|██████▉   | 2236/3200 [08:27<02:05,  7.70it/s][A
 70%|██████▉   | 2238/3200 [08:28<03:47,  4.23it/s][A
 70%|███████   | 2240/3200 [08:29<02:55,  5.47it/s][A
 70%|███████   | 2242/3200 [08:29<02:53,  5.53it/s][A
 70%|███████   | 2243/3200 [08:29<04:19,  3.69it/s][A
 70%|███████   | 2244/3200 [08:30<05:09,  3.08it/s][A
 70%|███████   | 2245/3200 [08:30<05:53,  2.70it/s][A
 70%|█████

 76%|███████▌  | 2429/3200 [09:06<02:56,  4.36it/s][A
 76%|███████▌  | 2431/3200 [09:06<02:22,  5.38it/s][A
 76%|███████▌  | 2432/3200 [09:06<02:36,  4.90it/s][A
 76%|███████▌  | 2433/3200 [09:06<02:20,  5.45it/s][A
 76%|███████▌  | 2434/3200 [09:07<03:32,  3.61it/s][A
 76%|███████▌  | 2435/3200 [09:07<02:54,  4.39it/s][A
 76%|███████▌  | 2438/3200 [09:07<02:12,  5.74it/s][A
 76%|███████▋  | 2440/3200 [09:07<01:57,  6.47it/s][A
 76%|███████▋  | 2442/3200 [09:08<02:52,  4.38it/s][A
 76%|███████▋  | 2443/3200 [09:09<03:51,  3.28it/s][A
 76%|███████▋  | 2446/3200 [09:09<02:51,  4.40it/s][A
 76%|███████▋  | 2448/3200 [09:09<02:42,  4.62it/s][A
 77%|███████▋  | 2449/3200 [09:10<02:58,  4.21it/s][A
 77%|███████▋  | 2450/3200 [09:10<03:51,  3.24it/s][A
 77%|███████▋  | 2451/3200 [09:10<04:31,  2.76it/s][A
 77%|███████▋  | 2452/3200 [09:11<04:54,  2.54it/s][A
 77%|███████▋  | 2453/3200 [09:11<03:50,  3.24it/s][A
 77%|███████▋  | 2455/3200 [09:11<02:54,  4.27it/s][A
 77%|█████

 83%|████████▎ | 2649/3200 [09:49<02:40,  3.43it/s][A
 83%|████████▎ | 2650/3200 [09:49<02:08,  4.26it/s][A
 83%|████████▎ | 2651/3200 [09:49<02:01,  4.51it/s][A
 83%|████████▎ | 2652/3200 [09:50<02:10,  4.20it/s][A
 83%|████████▎ | 2653/3200 [09:50<02:06,  4.31it/s][A
 83%|████████▎ | 2654/3200 [09:50<02:03,  4.43it/s][A
 83%|████████▎ | 2655/3200 [09:51<02:43,  3.34it/s][A
 83%|████████▎ | 2656/3200 [09:51<03:11,  2.84it/s][A
 83%|████████▎ | 2657/3200 [09:51<02:31,  3.59it/s][A
 83%|████████▎ | 2658/3200 [09:52<02:59,  3.03it/s][A
 83%|████████▎ | 2660/3200 [09:52<02:15,  3.98it/s][A
 83%|████████▎ | 2661/3200 [09:52<02:57,  3.04it/s][A
 83%|████████▎ | 2662/3200 [09:52<02:27,  3.65it/s][A
 83%|████████▎ | 2664/3200 [09:53<01:52,  4.77it/s][A
 83%|████████▎ | 2665/3200 [09:53<01:37,  5.46it/s][A
 83%|████████▎ | 2666/3200 [09:53<02:36,  3.41it/s][A
 83%|████████▎ | 2667/3200 [09:53<02:26,  3.65it/s][A
 83%|████████▎ | 2668/3200 [09:54<02:30,  3.53it/s][A
 83%|█████

 89%|████████▉ | 2854/3200 [10:33<01:59,  2.89it/s][A
 89%|████████▉ | 2855/3200 [10:34<01:52,  3.07it/s][A
 89%|████████▉ | 2856/3200 [10:34<01:33,  3.67it/s][A
 89%|████████▉ | 2857/3200 [10:34<01:26,  3.98it/s][A
 89%|████████▉ | 2858/3200 [10:34<01:13,  4.67it/s][A
 89%|████████▉ | 2859/3200 [10:34<01:05,  5.21it/s][A
 89%|████████▉ | 2861/3200 [10:34<00:51,  6.59it/s][A
 90%|████████▉ | 2864/3200 [10:35<00:40,  8.25it/s][A
 90%|████████▉ | 2866/3200 [10:35<00:42,  7.95it/s][A
 90%|████████▉ | 2868/3200 [10:35<00:59,  5.60it/s][A
 90%|████████▉ | 2869/3200 [10:36<01:10,  4.70it/s][A
 90%|████████▉ | 2871/3200 [10:36<01:01,  5.33it/s][A
 90%|████████▉ | 2872/3200 [10:37<01:31,  3.60it/s][A
 90%|████████▉ | 2873/3200 [10:37<01:23,  3.92it/s][A
 90%|████████▉ | 2876/3200 [10:37<01:08,  4.75it/s][A
 90%|████████▉ | 2877/3200 [10:37<01:08,  4.71it/s][A
 90%|████████▉ | 2878/3200 [10:38<01:13,  4.38it/s][A
 90%|████████▉ | 2879/3200 [10:38<01:02,  5.15it/s][A
 90%|█████

 96%|█████████▌| 3060/3200 [11:14<00:43,  3.23it/s][A
 96%|█████████▌| 3061/3200 [11:14<00:35,  3.91it/s][A
 96%|█████████▌| 3063/3200 [11:15<00:30,  4.54it/s][A
 96%|█████████▌| 3064/3200 [11:15<00:28,  4.70it/s][A
 96%|█████████▌| 3065/3200 [11:15<00:28,  4.69it/s][A
 96%|█████████▌| 3066/3200 [11:15<00:26,  5.11it/s][A
 96%|█████████▌| 3068/3200 [11:16<00:29,  4.47it/s][A
 96%|█████████▌| 3069/3200 [11:16<00:24,  5.33it/s][A
 96%|█████████▌| 3070/3200 [11:16<00:36,  3.55it/s][A
 96%|█████████▌| 3072/3200 [11:17<00:27,  4.61it/s][A
 96%|█████████▌| 3074/3200 [11:17<00:23,  5.31it/s][A
 96%|█████████▌| 3075/3200 [11:17<00:23,  5.37it/s][A
 96%|█████████▌| 3076/3200 [11:17<00:20,  6.18it/s][A
 96%|█████████▌| 3077/3200 [11:17<00:23,  5.22it/s][A
 96%|█████████▌| 3079/3200 [11:17<00:19,  6.32it/s][A
 96%|█████████▋| 3080/3200 [11:18<00:22,  5.28it/s][A
 96%|█████████▋| 3081/3200 [11:18<00:20,  5.94it/s][A
 96%|█████████▋| 3082/3200 [11:18<00:28,  4.08it/s][A
 96%|█████

In [19]:
Xfeat_df2 = pd.DataFrame(Xfeatures)
Xfeat_df2.shape

(3200, 256)

In [20]:
Xfeat_df2.to_csv('Xft100_.csv')

In [23]:
# energy extraction
rmsX = []
# extract vector info to X and y is the filenames ordered per extraction
for files in tqdm(filesArray):
    features = energy(files)
    rmsX.append(features)




  0%|          | 0/3200 [00:00<?, ?it/s][A
  0%|          | 6/3200 [00:00<01:19, 40.04it/s][A
  0%|          | 8/3200 [00:00<01:51, 28.53it/s][A
  0%|          | 10/3200 [00:00<02:32, 20.88it/s][A
  0%|          | 12/3200 [00:00<02:36, 20.34it/s][A
  0%|          | 14/3200 [00:00<02:58, 17.90it/s][A
  0%|          | 16/3200 [00:00<03:11, 16.64it/s][A
  1%|          | 18/3200 [00:01<03:52, 13.71it/s][A
  1%|          | 20/3200 [00:01<03:41, 14.38it/s][A
  1%|          | 22/3200 [00:01<03:47, 13.95it/s][A
  1%|          | 26/3200 [00:01<03:21, 15.72it/s][A
  1%|          | 29/3200 [00:01<02:54, 18.15it/s][A
  1%|          | 32/3200 [00:01<02:35, 20.34it/s][A
  1%|          | 36/3200 [00:01<02:22, 22.25it/s][A
  1%|▏         | 40/3200 [00:01<02:07, 24.75it/s][A
  1%|▏         | 43/3200 [00:02<02:56, 17.93it/s][A
  1%|▏         | 46/3200 [00:02<03:07, 16.81it/s][A
  2%|▏         | 50/3200 [00:02<02:37, 20.03it/s][A
  2%|▏         | 54/3200 [00:02<02:19, 22.59it/s][A
  2

 14%|█▎        | 436/3200 [00:24<02:46, 16.60it/s][A
 14%|█▍        | 440/3200 [00:25<02:19, 19.83it/s][A
 14%|█▍        | 443/3200 [00:25<02:43, 16.89it/s][A
 14%|█▍        | 446/3200 [00:25<02:28, 18.52it/s][A
 14%|█▍        | 449/3200 [00:25<02:34, 17.84it/s][A
 14%|█▍        | 453/3200 [00:25<02:24, 19.04it/s][A
 14%|█▍        | 456/3200 [00:25<02:20, 19.55it/s][A
 14%|█▍        | 459/3200 [00:26<02:39, 17.15it/s][A
 14%|█▍        | 461/3200 [00:26<03:15, 13.99it/s][A
 14%|█▍        | 463/3200 [00:26<03:31, 12.95it/s][A
 15%|█▍        | 465/3200 [00:26<03:11, 14.25it/s][A
 15%|█▍        | 468/3200 [00:26<02:43, 16.75it/s][A
 15%|█▍        | 473/3200 [00:26<02:12, 20.54it/s][A
 15%|█▍        | 476/3200 [00:27<02:14, 20.22it/s][A
 15%|█▍        | 479/3200 [00:27<02:16, 19.98it/s][A
 15%|█▌        | 482/3200 [00:27<02:03, 21.94it/s][A
 15%|█▌        | 485/3200 [00:27<02:25, 18.72it/s][A
 15%|█▌        | 488/3200 [00:27<02:27, 18.36it/s][A
 15%|█▌        | 494/3200 [0

 27%|██▋       | 861/3200 [00:49<02:22, 16.36it/s][A
 27%|██▋       | 864/3200 [00:49<02:17, 16.94it/s][A
 27%|██▋       | 866/3200 [00:49<03:02, 12.80it/s][A
 27%|██▋       | 869/3200 [00:49<02:31, 15.36it/s][A
 27%|██▋       | 871/3200 [00:49<02:26, 15.93it/s][A
 27%|██▋       | 874/3200 [00:50<02:31, 15.38it/s][A
 27%|██▋       | 876/3200 [00:50<02:22, 16.32it/s][A
 27%|██▋       | 878/3200 [00:50<02:30, 15.45it/s][A
 28%|██▊       | 880/3200 [00:50<02:32, 15.21it/s][A
 28%|██▊       | 883/3200 [00:50<02:11, 17.62it/s][A
 28%|██▊       | 888/3200 [00:50<01:51, 20.69it/s][A
 28%|██▊       | 891/3200 [00:50<01:53, 20.43it/s][A
 28%|██▊       | 894/3200 [00:51<01:57, 19.67it/s][A
 28%|██▊       | 898/3200 [00:51<01:39, 23.13it/s][A
 28%|██▊       | 902/3200 [00:51<01:28, 25.83it/s][A
 28%|██▊       | 905/3200 [00:51<01:27, 26.19it/s][A
 28%|██▊       | 908/3200 [00:51<01:45, 21.67it/s][A
 28%|██▊       | 911/3200 [00:51<02:14, 17.03it/s][A
 29%|██▊       | 915/3200 [0

 39%|███▉      | 1254/3200 [01:12<01:57, 16.58it/s][A
 39%|███▉      | 1256/3200 [01:12<02:02, 15.84it/s][A
 39%|███▉      | 1259/3200 [01:12<01:47, 18.10it/s][A
 39%|███▉      | 1261/3200 [01:12<01:45, 18.42it/s][A
 40%|███▉      | 1265/3200 [01:13<01:34, 20.52it/s][A
 40%|███▉      | 1270/3200 [01:13<01:20, 24.08it/s][A
 40%|███▉      | 1273/3200 [01:13<01:27, 21.91it/s][A
 40%|███▉      | 1276/3200 [01:13<01:27, 21.92it/s][A
 40%|███▉      | 1279/3200 [01:13<01:50, 17.46it/s][A
 40%|████      | 1282/3200 [01:13<01:47, 17.92it/s][A
 40%|████      | 1285/3200 [01:14<01:50, 17.40it/s][A
 40%|████      | 1287/3200 [01:14<01:47, 17.80it/s][A
 40%|████      | 1292/3200 [01:14<01:27, 21.74it/s][A
 40%|████      | 1295/3200 [01:14<01:23, 22.93it/s][A
 41%|████      | 1298/3200 [01:14<01:46, 17.87it/s][A
 41%|████      | 1301/3200 [01:14<01:42, 18.57it/s][A
 41%|████      | 1304/3200 [01:14<01:40, 18.79it/s][A
 41%|████      | 1307/3200 [01:15<01:46, 17.76it/s][A
 41%|████ 

 52%|█████▏    | 1661/3200 [01:36<01:33, 16.37it/s][A
 52%|█████▏    | 1664/3200 [01:36<01:25, 18.02it/s][A
 52%|█████▏    | 1667/3200 [01:36<01:16, 20.13it/s][A
 52%|█████▏    | 1670/3200 [01:36<01:28, 17.24it/s][A
 52%|█████▏    | 1674/3200 [01:36<01:23, 18.22it/s][A
 52%|█████▏    | 1679/3200 [01:37<01:15, 20.07it/s][A
 53%|█████▎    | 1682/3200 [01:37<01:22, 18.47it/s][A
 53%|█████▎    | 1685/3200 [01:37<01:39, 15.29it/s][A
 53%|█████▎    | 1690/3200 [01:37<01:19, 19.04it/s][A
 53%|█████▎    | 1694/3200 [01:37<01:11, 21.16it/s][A
 53%|█████▎    | 1697/3200 [01:37<01:13, 20.31it/s][A
 53%|█████▎    | 1700/3200 [01:38<01:18, 19.18it/s][A
 53%|█████▎    | 1704/3200 [01:38<01:05, 22.72it/s][A
 53%|█████▎    | 1707/3200 [01:38<01:15, 19.78it/s][A
 53%|█████▎    | 1710/3200 [01:38<01:19, 18.86it/s][A
 54%|█████▎    | 1713/3200 [01:38<01:29, 16.54it/s][A
 54%|█████▎    | 1715/3200 [01:38<01:36, 15.39it/s][A
 54%|█████▎    | 1717/3200 [01:39<01:40, 14.78it/s][A
 54%|█████

 66%|██████▌   | 2105/3200 [01:59<01:11, 15.38it/s][A
 66%|██████▌   | 2109/3200 [01:59<01:05, 16.64it/s][A
 66%|██████▌   | 2111/3200 [01:59<01:16, 14.31it/s][A
 66%|██████▌   | 2113/3200 [02:00<01:25, 12.70it/s][A
 66%|██████▌   | 2115/3200 [02:00<01:27, 12.36it/s][A
 66%|██████▌   | 2118/3200 [02:00<01:18, 13.85it/s][A
 66%|██████▋   | 2120/3200 [02:00<01:23, 12.96it/s][A
 66%|██████▋   | 2122/3200 [02:00<01:16, 14.18it/s][A
 66%|██████▋   | 2124/3200 [02:00<01:28, 12.12it/s][A
 66%|██████▋   | 2126/3200 [02:00<01:20, 13.33it/s][A
 66%|██████▋   | 2128/3200 [02:01<01:21, 13.09it/s][A
 67%|██████▋   | 2130/3200 [02:01<01:17, 13.73it/s][A
 67%|██████▋   | 2133/3200 [02:01<01:11, 15.02it/s][A
 67%|██████▋   | 2135/3200 [02:01<01:12, 14.65it/s][A
 67%|██████▋   | 2137/3200 [02:01<01:21, 13.02it/s][A
 67%|██████▋   | 2139/3200 [02:01<01:20, 13.17it/s][A
 67%|██████▋   | 2141/3200 [02:02<01:16, 13.85it/s][A
 67%|██████▋   | 2145/3200 [02:02<01:04, 16.44it/s][A
 67%|█████

 80%|███████▉  | 2557/3200 [02:23<00:36, 17.73it/s][A
 80%|████████  | 2561/3200 [02:23<00:31, 20.38it/s][A
 80%|████████  | 2564/3200 [02:23<00:33, 18.75it/s][A
 80%|████████  | 2567/3200 [02:23<00:34, 18.62it/s][A
 80%|████████  | 2570/3200 [02:24<00:35, 17.98it/s][A
 80%|████████  | 2572/3200 [02:24<00:35, 17.48it/s][A
 81%|████████  | 2577/3200 [02:24<00:29, 21.45it/s][A
 81%|████████  | 2580/3200 [02:24<00:31, 19.75it/s][A
 81%|████████  | 2583/3200 [02:24<00:30, 20.11it/s][A
 81%|████████  | 2586/3200 [02:24<00:33, 18.50it/s][A
 81%|████████  | 2590/3200 [02:24<00:29, 20.88it/s][A
 81%|████████  | 2594/3200 [02:25<00:28, 21.16it/s][A
 81%|████████  | 2597/3200 [02:25<00:29, 20.45it/s][A
 81%|████████▏ | 2602/3200 [02:25<00:24, 24.67it/s][A
 81%|████████▏ | 2605/3200 [02:25<00:23, 25.02it/s][A
 82%|████████▏ | 2610/3200 [02:25<00:20, 29.23it/s][A
 82%|████████▏ | 2614/3200 [02:25<00:20, 29.27it/s][A
 82%|████████▏ | 2618/3200 [02:25<00:24, 23.46it/s][A
 82%|█████

 94%|█████████▍| 3016/3200 [02:46<00:11, 15.51it/s][A
 94%|█████████▍| 3019/3200 [02:47<00:11, 15.57it/s][A
 94%|█████████▍| 3022/3200 [02:47<00:10, 17.47it/s][A
 95%|█████████▍| 3025/3200 [02:47<00:09, 18.73it/s][A
 95%|█████████▍| 3028/3200 [02:47<00:09, 17.73it/s][A
 95%|█████████▍| 3032/3200 [02:47<00:08, 18.97it/s][A
 95%|█████████▍| 3035/3200 [02:47<00:07, 21.12it/s][A
 95%|█████████▍| 3038/3200 [02:47<00:07, 22.76it/s][A
 95%|█████████▌| 3041/3200 [02:48<00:08, 18.55it/s][A
 95%|█████████▌| 3044/3200 [02:48<00:09, 16.24it/s][A
 95%|█████████▌| 3046/3200 [02:48<00:08, 17.18it/s][A
 95%|█████████▌| 3049/3200 [02:48<00:08, 17.81it/s][A
 95%|█████████▌| 3052/3200 [02:48<00:07, 20.28it/s][A
 95%|█████████▌| 3055/3200 [02:48<00:07, 20.56it/s][A
 96%|█████████▌| 3058/3200 [02:49<00:08, 16.66it/s][A
 96%|█████████▌| 3060/3200 [02:49<00:08, 15.56it/s][A
 96%|█████████▌| 3063/3200 [02:49<00:07, 18.06it/s][A
 96%|█████████▌| 3066/3200 [02:49<00:07, 18.47it/s][A
 96%|█████

In [24]:
Xrms = pd.DataFrame(rmsX)

In [25]:
Xrms.to_csv('dataRms_.csv')

In [None]:
result = pd.concat([Xfeat_df2, Xrms], axis=1)

In [41]:
stat_df = pd.read_csv('stat100.csv')

In [42]:
result = pd.concat([result, stat_df], axis=1)

In [43]:
result.shape

(3200, 65)

In [None]:
result.to_csv('dataFeatures_.csv')

# process categorical data 

In [59]:
cat_data = X_cat_df.copy()
cat_data.head()

Unnamed: 0,season,tod,country
0,summer,morning,United States
1,summer,morning,United States
2,summer,day,United States
3,summer,morning,United States
4,summer,morning,United States


In [130]:
from pandas.api.types import CategoricalDtype
from sklearn.preprocessing import OneHotEncoder, LabelEncoder

In [131]:
ohe = OneHotEncoder()
le = LabelEncoder()
lb = LabelBinarizer()

In [62]:
cat_data.drop(columns=['tod'], inplace=True)

In [61]:
cat_data['country'] = pd.Categorical(cat_data['country'])
cat_data['season'] = pd.Categorical(cat_data['season'])

In [134]:
cat = pd.get_dummies(cat_data['country'],prefix='country', drop_first=True)
cat2 = pd.get_dummies(cat_data['season'],prefix='season', drop_first=True)

In [135]:
XFeat_dummies = pd.concat([cat, cat2],axis=1)

In [136]:
XFeat_dummies

Unnamed: 0,country_Mexico,country_United States,season_spring,season_summer,season_winter
0,0,1,0,1,0
1,0,1,0,1,0
2,0,1,0,1,0
3,0,1,0,1,0
4,0,1,0,1,0
...,...,...,...,...,...
3195,0,1,0,1,0
3196,0,1,0,1,0
3197,0,1,0,0,1
3198,0,1,1,0,0


In [120]:
catEnc = pd.DataFrame()
catEnc['season'] = cat_data.season.cat.codes
catEnc['country'] = cat_data.country.cat.codes

In [121]:
catEnc

Unnamed: 0,season,country
0,2,2
1,2,2
2,2,2
3,2,2
4,2,2
...,...,...
3195,2,2
3196,2,2
3197,3,2
3198,1,2


# save feature array and target data array

In [74]:
X_df = pd.DataFrame(result)

In [96]:
#XFeat_ohe = pd.concat([X_df, target_ohe], axis=1)
XFeat_enc = pd.concat([X_df, targetEnc], axis=1)

In [99]:
XFeat_enc.to_csv('Xfeat_le.csv')

In [98]:
Xfeat_ohe = pd.concat([X_df, XFeat_dummies], axis=1)

In [100]:
Xfeat_ohe.to_csv('Xfeat_ohe.csv')

In [101]:
df_wT = X_df.copy()

In [102]:
# save y
df_wT['target'] = target_df['ebird_code']

In [103]:
df_wT.to_csv('Xfeat_target.csv')

In [123]:
catEnc.to_csv('catEnc.csv')

In [137]:
XFeat_dummies.to_csv('catOhe.csv')