# Preprocessing All of the Samples and Storing the Sounds as Numerical Data

In [2]:
import os
from scipy.io import wavfile
from scipy.fftpack import fft
import matplotlib.pyplot as plt
from pylab import *

def list_samples_from_directory (sample_dir, include_dir_path) :
    '''
        list file names from directory that end in .wav
        either include or exclude directory path (probably always use true)
    '''
    if include_dir_path:
        sound_files = [sample_dir + file for file in os.listdir(sample_dir) if ".wav" in file and ".asd" not in file]
    else :
        sound_files = [file for file in os.listdir(sample_dir) if ".wav" in file]
    return sound_files

def split_whole_wav_to_individual_samples (sample_path, shortened_samples_destination, 
                                           desired_sample_time, time_between_samples) :
    '''
        Splits long sound samples into shorter samples taking into account spacing between the sounds
        Writes them to new files
        
        TODO: write data to a data file
    '''
    sample_rate, sample = wavfile.read(sample_path)

    sample_length = convert_sec_to_index(sample_rate, desired_sample_time)
    split_length = convert_sec_to_index(sample_rate, time_between_samples)
    
    individual_samples = [sample[i : i + sample_length] for i in range (0, len(sample), sample_length + split_length)]
    counter = 0
    for i in individual_samples :
        wavfile.write(shortened_samples_destination + str(counter)+".wav", sample_rate, i)
        counter += 1
    
def convert_sec_to_index (sampleRate, time):
    return int(time * sampleRate)



## Execute

In [6]:
grandSamples = list_samples_from_directory ("./Samples/Grand/", True)
clavSamples = list_samples_from_directory ("./Samples/Clav/", True)
rhodesSamples = list_samples_from_directory("./Samples/Rhodes/", True)
bassSamples = list_samples_from_directory("./Samples/Bass/", True)

sample_sets_list = [[grandSamples, "./Samples/Grand/"],
                [clavSamples, "./Samples/Clav/"],
                [rhodesSamples, "./Samples/Rhodes/"],
                [bassSamples, "./Samples/Bass/"]]

print ("Grand Samples:\n\t%s\n\
Clav Samples:\n\t%s\n\
RhodesSamples:\n\t%s\n\
BassSamples:\n\t%s" %(grandSamples, clavSamples, rhodesSamples, bassSamples))

# split_whole_wav_to_individual_samples (grandSamples[0], "./Samples/Grand/Shortened/", 1.2, .8)
print (sample_sets_list[0][0])

for instrument_samples in sample_sets_list :
    sample_dir_path = instrument_samples[1]
    counter = 0
    for sample in instrument_samples[0]:
        split_whole_wav_to_individual_samples (sample, sample_dir_path+"/Shortened/", 1.2, .8)
        counter += 1



Grand Samples:
	['./Samples/Grand/Grand.wav', './Samples/Grand/Grand2.wav']
Clav Samples:
	['./Samples/Clav/ClavinetSamples.wav']
RhodesSamples:
	['./Samples/Rhodes/RhodesSamples.wav']
BassSamples:
	['./Samples/Bass/Bass.wav']
['./Samples/Grand/Grand.wav', './Samples/Grand/Grand2.wav']
