In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np

In [2]:
data_pt = 'data/final/target_pt.csv'

In [3]:
class TextDataset(Dataset):
    def __init__(self, csv_file):
        self.data = pd.read_csv(csv_file)
        self.data = self.data.dropna()
    def __len__(self):
        return len(self.data)
    def __getitem__(self, idx):
        return self.data['text'].iloc[idx]
    def visualizeData():
        print(self.data.head())

In [4]:
dataset = TextDataset(data_pt)
dataloader = DataLoader(dataset, batch_size=1, shuffle=True)

## CPU Augmentation

In [5]:
import multiprocessing
import os
import time
import random
random.seed(101)

In [6]:
data_pt = 'data/final/target_pt.csv'
data = pd.read_csv(data_pt)
data = data.dropna()

In [66]:
df_list = []
idx = 0
stride=50
while(idx < len(data)):
    if idx+stride<len(data):
        subdf = data.iloc[idx:idx+stride]
    else:
        subdf = data.iloc[idx:]
    df_list.append(subdf)
    idx+=stride

In [67]:
len(df_list)

15

In [68]:
class PitchTransform(object):
    def __init__(self,shift_amount):
        self.shift = shift_amount
        
    def __call__(self,data):
        #switch to batch processsing
        corpus = []
        for i,row in data.iterrows():
            sample = row['text']
            data = sample.strip().split(" ")
            mask_LH = (1 << 28) - 1
            mask_RH = (1 << 34) - 1
            str_out = ""
            for i in data:
                LH = int(i) & mask_LH 
                RH = int(i) >> 28
                if self.shift > 0:
                    shift_LH = LH << self.shift
                    shift_RH = RH << self.shift
                    shift_LH = shift_LH & mask_LH 
                    shift_RH = shift_RH & mask_RH
                else:
                    shift_LH = LH >> -self.shift
                    shift_RH = RH >> -self.shift
                shifted = (shift_RH << 28)+shift_LH
                str_out+=str(shifted)+" "
            new_str = str_out.strip()
            corpus.append(new_str)
        df_new = pd.DataFrame(corpus, columns = ['text'])
        return df_new

In [69]:
class NoteTransform(object):
    def __init__(self,p):
        self.p = p
        
    def __call__(self,data):
        #switch to batch processsing
        corpus = []
        for i,row in data.iterrows():
            sample = row['text']
            data = sample.strip().split(" ")
            for i in range(2):
                new_str = ""
                for i in data:
                    rand_num=0
                    for j in range(62):
                        rand_num = rand_num<<1
                        rand_num+=random.choices([0,1],weights = [self.p, 1-self.p])[0]
                    new_col = int(i) & rand_num
                    new_str+=str(new_col)+" "
                new_str = new_str.strip()
                corpus.append(new_str)
            corpus.append(sample)
        df_new = pd.DataFrame(corpus, columns = ['text'])
        return df_new

In [70]:
class HandTransform(object):
    def __init__(self,p):
        self.p = p
        
    def __call__(self,data):
        #switch to batch processsing
        corpus = []
        for i,row in data.iterrows():
            sample = row['text']
            data = sample.strip().split(" ")
            for i in range(2):
                new_str = ""
                for i in data:
                    dropRand = random.random()
                    if dropRand < self.p:
                        rand = random.randint(1,2)
                        if rand==1:
                            #Drop left hand
                            RH = (int(i)>>28) << 28
                            new_str+=str(RH)+" "
                        elif rand==2:
                            #Drop right hand
                            mask = (1 << 28) - 1
                            LH = int(i) & mask
                            new_str+=str(LH)+" "
                    else:
                        new_str+=i+" "
                new_str = new_str.strip()
                corpus.append(new_str)
            corpus.append(sample)
        df_new = pd.DataFrame(corpus, columns = ['text'])
        return df_new

In [71]:
class ColTransform(object):
    def __init__(self,p):
        self.p = p
        
    def __call__(self,data):
        #switch to batch processsing
        corpus = []
        for i,row in data.iterrows():
            sample = row['text']
            data = sample.strip().split(" ")
            for i in range(2):
                new_str = ""
                for i in data:
                    dropRand = random.random()
                    if dropRand < self.p:
                        new_str+="0"+" "
                    else:
                        new_str+=i+" "
                new_str = new_str.strip()
                corpus.append(new_str)
            corpus.append(sample)
        df_new = pd.DataFrame(corpus, columns = ['text'])
        return df_new

In [72]:
def processDf(idx,Col,Hand,Note,Pitch):
    print(f'Processing {idx}/{len(df_list)}')
    cur_df = df_list[idx]
    tf1 = ColTransform(Col)
    tf2 = HandTransform(Hand)
    tf3 = NoteTransform(Note)
    tf4 = PitchTransform(Pitch)
    print(idx,1)
    cur_df= tf1(cur_df)
    print(idx,2)
    cur_df= tf2(cur_df)
    print(idx,4)
    cur_df= tf4(cur_df)
    new_df = cur_df
    new_df.to_csv(f'tmp/{idx}.csv',index=False)

In [73]:
def transformData(shift):
    values = [[i,.3,.5,.5,shift] for i in range(len(df_list))]
    cores = 30
    pool = multiprocessing.Pool(processes=cores)
    outputs = list(pool.starmap(processDf, values))

In [74]:
def combineOutputs():
    new_df_list = []
    for path in os.listdir('tmp'):
        fpath = os.path.join('tmp',path)
        new_df_list.append(pd.read_csv(fpath))
        os.remove(fpath)
    result = pd.concat(new_df_list)
    return result

In [75]:
t = time.time()
shifts = [-4,-3,-2,-1,0,1,2,3,4]
for shift in shifts:
    print(f"Shift Factor {shift}")
    transformData(shift)
    result = combineOutputs()
    result.to_csv(f'data/augmentTmp/all_pt{shift}.csv',index=False)
print(time.time()-t)

Shift Factor -4
Processing 11/15
11 1
Processing 6/15
Processing 8/15
Processing 14/15
Processing 1/15
Processing 13/15
Processing 3/15
Processing 4/15
Processing 5/15
Processing 7/15
Processing 9/15
Processing 10/15
Processing 12/15
Processing 2/15
8 1
Processing 0/15
1 1
13 1
6 1
3 1
7 1
14 1
4 1
9 1
10 1
12 1
5 1
2 1
0 1
11 2
13 2
1 2
3 2
12 2
4 2
6 2
8 2
14 2
0 2
9 2
2 2
10 2
7 2
5 2
13 4
0 4
14 4
11 4
2 4
1 4
3 4
12 4
10 4
9 4
4 4
5 4
7 4
8 4
6 4
Shift Factor -3
Processing 0/15
Processing 4/15
Processing 3/15
Processing 1/15
Processing 2/15
Processing 5/15
Processing 7/15
Processing 6/15
Processing 13/15
5 1
6 1
2 1
0 1
3 1
1 1
7 1
4 1
13 1
Processing 12/15
Processing 9/15
Processing 11/15
Processing 8/15
Processing 10/15
Processing 14/15
10 1
12 1
9 1
8 1
11 1
14 1
2 2
13 2
14 2
0 2
11 2
5 2
3 2
10 2
1 2
4 2
9 2
6 2
7 2
12 2
8 2
0 4
14 4
2 4
13 4
11 4
1 4
10 4
5 4
3 4
12 4
7 4
9 4
4 4
6 4
8 4
Shift Factor -2
Processing 0/15
Processing 1/15
Processing 10/15
Processing 11/15
Proces

In [56]:
ps = [.1,.2,.3,.4,.5]
for p in ps:
    print(f"Probability {p}")
    transformData(NoteTransform(p))
    result = combineOutputs()
    result.to_csv(f'data/augmentFinal/imslp_pt_Note_{p}.csv',index=False)

Probability 0.1
Processing 1/37
Processing 22/37
Processing 28/37
Processing 6/37
Processing 11/37
Processing 2/37
Processing 17/37
Processing 9/37
Processing 27/37
Processing 15/37
Processing 24/37
Processing 7/37
Processing 10/37
Processing 20/37
Processing 13/37
Processing 0/37
Processing 14/37
Processing 8/37
Processing 18/37
Processing 12/37
Processing 25/37
Processing 21/37
Processing 3/37
Processing 4/37
Processing 16/37
Processing 26/37
Processing 23/37
Processing 19/37
Processing 29/37
Processing 5/37
Processing 30/37
Processing 31/37
Processing 32/37
Processing 33/37
Processing 34/37
Processing 35/37
Processing 36/37
Probability 0.2
Processing 1/37
Processing 3/37
Processing 5/37
Processing 4/37
Processing 28/37
Processing 29/37
Processing 20/37
Processing 26/37
Processing 10/37
Processing 21/37
Processing 19/37
Processing 18/37
Processing 11/37
Processing 16/37
Processing 24/37
Processing 13/37
Processing 12/37
Processing 23/37
Processing 0/37
Processing 9/37
Processing 15/3

In [50]:
ps = [.1,.2,.3,.4,.5]
for p in ps:
    print(f"Probability {p}")
    transformData(HandTransform(p))
    result = combineOutputs()
    result.to_csv(f'data/augmentFinal/imslp_pt_Hand_{p}.csv',index=False)

Probability 0.1
Processing 1/37
Processing 14/37
Processing 27/37
Processing 6/37
Processing 25/37
Processing 4/37
Processing 24/37
Processing 9/37
Processing 17/37
Processing 8/37
Processing 19/37
Processing 15/37
Processing 0/37
Processing 23/37
Processing 29/37
Processing 20/37
Processing 21/37
Processing 12/37
Processing 2/37
Processing 13/37
Processing 10/37
Processing 3/37
Processing 11/37
Processing 7/37
Processing 22/37
Processing 18/37
Processing 28/37
Processing 26/37
Processing 5/37
Processing 16/37
Processing 30/37
Processing 31/37
Processing 32/37
Processing 33/37
Processing 34/37
Processing 35/37
Processing 36/37
Probability 0.2
Processing 3/37
Processing 1/37
Processing 2/37
Processing 0/37
Processing 4/37
Processing 7/37
Processing 5/37
Processing 8/37
Processing 12/37
Processing 6/37
Processing 11/37
Processing 9/37
Processing 10/37
Processing 13/37
Processing 15/37
Processing 18/37
Processing 16/37
Processing 17/37
Processing 22/37
Processing 21/37
Processing 23/37
Pr

In [51]:
ps = [.1,.2,.3,.4,.5]
for p in ps:
    print(f"Probability {p}")
    transformData(ColTransform(p))
    result = combineOutputs()
    result.to_csv(f'data/augmentFinal/imslp_pt_Col_{p}.csv',index=False)

Probability 0.1
Processing 1/37
Processing 11/37
Processing 27/37
Processing 4/37
Processing 20/37
Processing 10/37
Processing 28/37
Processing 24/37
Processing 23/37
Processing 5/37
Processing 29/37
Processing 9/37
Processing 6/37
Processing 19/37
Processing 3/37
Processing 22/37
Processing 2/37
Processing 0/37
Processing 15/37
Processing 26/37
Processing 18/37
Processing 21/37
Processing 16/37
Processing 8/37
Processing 13/37
Processing 7/37
Processing 25/37
Processing 17/37
Processing 14/37
Processing 12/37
Processing 30/37
Processing 31/37
Processing 32/37
Processing 33/37
Processing 34/37
Processing 35/37
Processing 36/37
Probability 0.2
Processing 21/37
Processing 24/37
Processing 8/37
Processing 12/37
Processing 25/37
Processing 5/37
Processing 0/37
Processing 19/37
Processing 4/37
Processing 28/37
Processing 7/37
Processing 29/37
Processing 2/37
Processing 18/37
Processing 6/37
Processing 26/37
Processing 13/37
Processing 3/37
Processing 27/37
Processing 10/37
Processing 11/37
