In [1]:
import math
import numpy
import torch
import torch.nn as nn
import torchaudio
torchaudio.set_audio_backend("soundfile")
import matplotlib.pyplot as plt



In [2]:
class AudioSample:
    def __init__(self, filepath):
        loadedData = torchaudio.load(filepath)
        self.waveform = loadedData[0][0]
        self.sampleRate = loadedData[1]
        del loadedData
        self.pitchDeltas = torch.tensor([], dtype = int)
        self.pitchBorders = torch.tensor([], dtype = int)
        self.Pitch = torch.tensor([0], dtype = int)
        self.spectra = torch.tensor([[]], dtype = float)
        self.spectrum = torch.tensor([], dtype = float)
        self.excitation = torch.tensor([], dtype = float) #replace with periodic and aperiodic excitation once implemented
        
    def CalculatePitch(self, expectedPitch, searchRange = 0.2):
        batchSize = math.floor((1. + searchRange) * self.sampleRate / expectedPitch)
        lowerSearchLimit = math.floor((1. - searchRange) * self.sampleRate / expectedPitch)
        batchStart = 0
        while batchStart + batchSize <= self.waveform.size()[0] - batchSize:
            sample = torch.index_select(self.waveform, 0, torch.linspace(batchStart, batchStart + batchSize, batchSize, dtype = int))
            zeroTransitions = torch.tensor([], dtype = int)
            for i in range(lowerSearchLimit, batchSize):
                if (sample[i-1] < 0) and (sample[i] > 0):
                    zeroTransitions = torch.cat([zeroTransitions, torch.tensor([i])], 0)
            error = math.inf
            delta = math.floor(self.sampleRate / expectedPitch)
            for i in zeroTransitions:
                shiftedSample = torch.index_select(self.waveform, 0, torch.linspace(batchStart + i.item(), batchStart + batchSize + i.item(), batchSize, dtype = int))
                newError = torch.sum(torch.pow(sample - shiftedSample, 2))
                if error > newError:
                    delta = i.item()
                    error = newError
            self.pitchDeltas = torch.cat([self.pitchDeltas, torch.tensor([delta])])
            batchStart += delta
        nBatches = self.pitchDeltas.size()[0]
        self.pitchBorders = torch.zeros(nBatches + 1, dtype = int)
        for i in range(nBatches):
            self.pitchBorders[i+1] = self.pitchBorders[i] + self.pitchDeltas[i]
        self.Pitch = torch.mean(self.pitchDeltas.float()).int()
        del batchSize
        del lowerSearchLimit
        del batchStart
        del sample
        del zeroTransitions
        del error
        del delta
        del shiftedSample
        del newError
        del nBatches
        
    def CalculateSpectra(self, iterations = 10, filterWidth = 20):
        Window = torch.hann_window(self.Pitch * 3)
        signals = torch.stft(self.waveform, self.Pitch * 3, hop_length = self.Pitch, win_length = self.Pitch * 3, window = Window, return_complex = True)
        signals = torch.transpose(signals, 0, 1)
        signals = torch.log(signals)
        workingSpectra = signals.abs()
        self.spectra = torch.full_like(workingSpectra, -float("inf"), dtype=torch.float)
        for i in range(iterations):
            workingSpectra = torch.max(workingSpectra, self.spectra)
            self.spectra = workingSpectra
            for i in range(filterWidth):
                self.spectra = torch.roll(workingSpectra, -i, dims = 1) + self.spectra + torch.roll(workingSpectra, i, dims = 1)
            self.spectra = self.spectra / (2 * filterWidth + 1)
        self.spectrum = torch.mean(self.spectra, 0)
        for i in range(self.spectra.size()[0]):
            self.spectra[i] = self.spectra[i] - self.spectrum
        del Window
        del signals
        del workingSpectra
        
    def CalculateExcitation(self):
        Window = torch.hann_window(self.Pitch * 3)
        signals = torch.stft(self.waveform, self.Pitch * 3, hop_length = self.Pitch, win_length = self.Pitch * 3, window = Window, return_complex = True)
        signals = torch.transpose(signals, 0, 1)
        excitations = torch.empty_like(signals)
        for i in range(excitations.size()[0]):
            excitations[i] = signals[i] / (torch.exp(self.spectrum) + torch.exp(self.spectra[i]))
        excitations = torch.transpose(excitations, 0, 1)
        self.excitation = torch.istft(excitations, self.Pitch * 3, hop_length = self.Pitch, win_length = self.Pitch * 3, window = Window, onesided = True)
        del Window
        del signals
        del excitations

In [21]:
class Synthesizer:
    def __init__(self, Excitation, Spectrum, Spectra, SampleRate):
        self.excitation = Excitation
        self.spectrum = Spectrum
        self.spectra = Spectra
        self.sampleRate = SampleRate
        self.returnSignal = torch.tensor([], dtype = float)
        
    def Synthesize(self, pitch, steadiness):
        Window = torch.hann_window(pitch * 3)
        self.returnSignal = torch.stft(self.excitation, pitch * 3, hop_length = pitch, win_length = pitch * 3, window = Window, return_complex = True)
        self.returnSignal = torch.transpose(self.returnSignal, 0, 1)
        for i in range(self.spectra.size()[0]):
            self.returnSignal[i] = self.returnSignal[i] * (torch.exp(self.spectrum) + math.pow(1 - steadiness, 2) * torch.roll(torch.exp(self.spectra), 0, dims=0)[i])
        self.returnSignal = torch.transpose(self.returnSignal, 0, 1)
        self.returnSignal = torch.istft(self.returnSignal, pitch * 3, hop_length = pitch, win_length = pitch * 3, window = Window, onesided=True, )
        del Window
        
    def save(self, filepath):
        torchaudio.save(filepath, torch.unsqueeze(self.returnSignal, 0), self.sampleRate, format="wav", encoding="PCM_S", bits_per_sample=32)

In [4]:
class RelLoss(nn.Module):
    def __init__(self, weight=None, size_average=True):
        super(RelLoss, self).__init__()
 
    def forward(self, inputs, targets):    
        inputs = inputs.view(-1)
        targets = targets.view(-1)
        differences = torch.abs(inputs - targets)
        sums = torch.abs(inputs + targets)
        out = (differences / sums).sum() / inputs.size()[0]
        return out

In [5]:
class SpecCrfAi(nn.Module):
    def __init__(self, learningRate=1e-5):
        super(SpecCrfAi, self).__init__()
        
        self.layer1 = torch.nn.Conv2d(1, 10, (51, 3), padding = (25, 0), bias = False)
        self.ReLu1 = nn.ReLU()
        self.layer2 = torch.nn.Conv2d(10, 10, (51, 1), padding = (25, 0), bias = False)
        self.ReLu2 = nn.ReLU()
        self.layer3 = torch.nn.Conv2d(10, 10, (51, 1), padding = (25, 0), bias = False)
        self.ReLu3 = nn.ReLU()
        self.layer4 = torch.nn.Conv2d(10, 1, (51, 1), padding = (25, 0), bias = False)
        
        self.learningRate = learningRate
        self.optimizer = torch.optim.Adam(self.parameters(), lr=self.learningRate, weight_decay=0.)
        #self.criterion = nn.L1Loss()
        self.criterion = RelLoss()
        
    def forward(self, spectrum1, spectrum2, factor):
        fac = torch.full((spectrum1.size()[0], 1), factor)
        x = torch.cat((spectrum1.unsqueeze(1), fac, spectrum2.unsqueeze(1)), dim = 1)
        x = x.float().unsqueeze(0).unsqueeze(0)
        x = self.layer1(x)
        x = self.ReLu1(x)
        x = self.layer2(x)
        x = self.ReLu2(x)
        x = self.layer3(x)
        x = self.ReLu3(x)
        x = self.layer4(x)
        return x
    
    def processData(self, spectrum1, spectrum2, factor):
        output = torch.squeeze(self(spectrum1, spectrum2, factor))
        return output
    
    def train(self, indata, epochs=1):
        for epoch in range(epochs):
            for data in self.dataLoader(indata):
                spectrum1 = data[0]
                spectrum2 = data[-1]
                indexList = numpy.arange(0, data.size()[0], 1)
                numpy.random.shuffle(indexList)
                for i in indexList:
                    factor = i / float(data.size()[0])
                    spectrumTarget = data[i]
                    output = torch.squeeze(self(spectrum1, spectrum2, factor))
                    loss = self.criterion(output, spectrumTarget)
                    self.optimizer.zero_grad()
                    loss.backward()
                    self.optimizer.step()
            print('epoch [{}/{}], loss:{:.4f}'
                  .format(epoch + 1, epochs, loss.data))
            
    def dataLoader(self, data):
        return torch.utils.data.DataLoader(dataset=data, shuffle=True)

In [6]:
audioSample1 = AudioSample("Samples/a_a.wav")
audioSample1.CalculatePitch(95.)
audioSample1.CalculateSpectra(iterations = 50)
audioSample1.CalculateExcitation()

audioSample2 = AudioSample("Samples/a_i.wav")
audioSample2.CalculatePitch(95.)
audioSample2.CalculateSpectra(iterations = 50)
audioSample2.CalculateExcitation()

audioSample3 = AudioSample("Samples/a_u.wav")
audioSample3.CalculatePitch(95.)
audioSample3.CalculateSpectra(iterations = 50)
audioSample3.CalculateExcitation()

audioSample4 = AudioSample("Samples/e_a.wav")
audioSample4.CalculatePitch(95.)
audioSample4.CalculateSpectra(iterations = 50)
audioSample4.CalculateExcitation()

audioSample5 = AudioSample("Samples/i_a.wav")
audioSample5.CalculatePitch(95.)
audioSample5.CalculateSpectra(iterations = 50)
audioSample5.CalculateExcitation()

audioSample6 = AudioSample("Samples/u_e.wav")
audioSample6.CalculatePitch(95.)
audioSample6.CalculateSpectra(iterations = 50)
audioSample6.CalculateExcitation()

audioSampleA = AudioSample("Samples/a2.wav")
audioSampleA.CalculatePitch(95.)
audioSampleA.CalculateSpectra(iterations = 50)
audioSampleA.CalculateExcitation()

audioSampleE = AudioSample("Samples/e.wav")
audioSampleE.CalculatePitch(95.)
audioSampleE.CalculateSpectra(iterations = 50)
audioSampleE.CalculateExcitation()

audioSampleI = AudioSample("Samples/i.wav")
audioSampleI.CalculatePitch(95.)
audioSampleI.CalculateSpectra(iterations = 50)
audioSampleI.CalculateExcitation()

audioSampleU = AudioSample("Samples/u.wav")
audioSampleU.CalculatePitch(95.)
audioSampleU.CalculateSpectra(iterations = 50)
audioSampleU.CalculateExcitation()

In [7]:
audioSamples = [audioSample1, audioSample2, audioSample3, audioSample4, audioSample5, audioSample6]
trainSpectra = [0, 0, 0, 0, 0, 0]
for i in range(6):
    trainSpectra[i] = torch.empty_like(audioSamples[i].spectra)
    for j in range(audioSamples[i].spectra.size()[0]):
        trainSpectra[i][j] = audioSamples[i].spectrum + audioSamples[i].spectra[j]
    
specCrfAi = SpecCrfAi()
for i in range(6):
    specCrfAi.train(trainSpectra[i], epochs = 500)

epoch [1/500], loss:0.9756
epoch [2/500], loss:0.9577
epoch [3/500], loss:0.9317
epoch [4/500], loss:0.8943
epoch [5/500], loss:0.8430
epoch [6/500], loss:0.7761
epoch [7/500], loss:0.6921
epoch [8/500], loss:0.5822
epoch [9/500], loss:0.4000
epoch [10/500], loss:0.1367
epoch [11/500], loss:0.0286
epoch [12/500], loss:0.0130
epoch [13/500], loss:0.0110
epoch [14/500], loss:0.0121
epoch [15/500], loss:0.0116
epoch [16/500], loss:0.0115
epoch [17/500], loss:0.0108
epoch [18/500], loss:0.0117
epoch [19/500], loss:0.0109
epoch [20/500], loss:0.0115
epoch [21/500], loss:0.0119
epoch [22/500], loss:0.0107
epoch [23/500], loss:0.0115
epoch [24/500], loss:0.0109
epoch [25/500], loss:0.0104
epoch [26/500], loss:0.0100
epoch [27/500], loss:0.0102
epoch [28/500], loss:0.0116
epoch [29/500], loss:0.0102
epoch [30/500], loss:0.0099
epoch [31/500], loss:0.0109
epoch [32/500], loss:0.0100
epoch [33/500], loss:0.0103
epoch [34/500], loss:0.0106
epoch [35/500], loss:0.0105
epoch [36/500], loss:0.0097
e

epoch [288/500], loss:0.0030
epoch [289/500], loss:0.0029
epoch [290/500], loss:0.0030
epoch [291/500], loss:0.0029
epoch [292/500], loss:0.0031
epoch [293/500], loss:0.0030
epoch [294/500], loss:0.0032
epoch [295/500], loss:0.0032
epoch [296/500], loss:0.0032
epoch [297/500], loss:0.0030
epoch [298/500], loss:0.0032
epoch [299/500], loss:0.0029
epoch [300/500], loss:0.0029
epoch [301/500], loss:0.0030
epoch [302/500], loss:0.0029
epoch [303/500], loss:0.0029
epoch [304/500], loss:0.0029
epoch [305/500], loss:0.0028
epoch [306/500], loss:0.0029
epoch [307/500], loss:0.0029
epoch [308/500], loss:0.0029
epoch [309/500], loss:0.0029
epoch [310/500], loss:0.0029
epoch [311/500], loss:0.0030
epoch [312/500], loss:0.0029
epoch [313/500], loss:0.0029
epoch [314/500], loss:0.0029
epoch [315/500], loss:0.0029
epoch [316/500], loss:0.0029
epoch [317/500], loss:0.0028
epoch [318/500], loss:0.0028
epoch [319/500], loss:0.0029
epoch [320/500], loss:0.0029
epoch [321/500], loss:0.0031
epoch [322/500

epoch [74/500], loss:0.0024
epoch [75/500], loss:0.0023
epoch [76/500], loss:0.0024
epoch [77/500], loss:0.0024
epoch [78/500], loss:0.0023
epoch [79/500], loss:0.0024
epoch [80/500], loss:0.0023
epoch [81/500], loss:0.0023
epoch [82/500], loss:0.0023
epoch [83/500], loss:0.0024
epoch [84/500], loss:0.0023
epoch [85/500], loss:0.0024
epoch [86/500], loss:0.0024
epoch [87/500], loss:0.0023
epoch [88/500], loss:0.0024
epoch [89/500], loss:0.0023
epoch [90/500], loss:0.0023
epoch [91/500], loss:0.0023
epoch [92/500], loss:0.0023
epoch [93/500], loss:0.0023
epoch [94/500], loss:0.0023
epoch [95/500], loss:0.0024
epoch [96/500], loss:0.0023
epoch [97/500], loss:0.0023
epoch [98/500], loss:0.0024
epoch [99/500], loss:0.0024
epoch [100/500], loss:0.0025
epoch [101/500], loss:0.0024
epoch [102/500], loss:0.0026
epoch [103/500], loss:0.0025
epoch [104/500], loss:0.0024
epoch [105/500], loss:0.0023
epoch [106/500], loss:0.0023
epoch [107/500], loss:0.0024
epoch [108/500], loss:0.0022
epoch [109/

epoch [358/500], loss:0.0008
epoch [359/500], loss:0.0007
epoch [360/500], loss:0.0006
epoch [361/500], loss:0.0005
epoch [362/500], loss:0.0006
epoch [363/500], loss:0.0006
epoch [364/500], loss:0.0006
epoch [365/500], loss:0.0008
epoch [366/500], loss:0.0006
epoch [367/500], loss:0.0008
epoch [368/500], loss:0.0007
epoch [369/500], loss:0.0005
epoch [370/500], loss:0.0005
epoch [371/500], loss:0.0008
epoch [372/500], loss:0.0007
epoch [373/500], loss:0.0006
epoch [374/500], loss:0.0007
epoch [375/500], loss:0.0006
epoch [376/500], loss:0.0008
epoch [377/500], loss:0.0007
epoch [378/500], loss:0.0007
epoch [379/500], loss:0.0006
epoch [380/500], loss:0.0007
epoch [381/500], loss:0.0007
epoch [382/500], loss:0.0007
epoch [383/500], loss:0.0006
epoch [384/500], loss:0.0006
epoch [385/500], loss:0.0006
epoch [386/500], loss:0.0006
epoch [387/500], loss:0.0007
epoch [388/500], loss:0.0006
epoch [389/500], loss:0.0007
epoch [390/500], loss:0.0006
epoch [391/500], loss:0.0007
epoch [392/500

epoch [145/500], loss:0.0005
epoch [146/500], loss:0.0004
epoch [147/500], loss:0.0006
epoch [148/500], loss:0.0006
epoch [149/500], loss:0.0007
epoch [150/500], loss:0.0006
epoch [151/500], loss:0.0005
epoch [152/500], loss:0.0004
epoch [153/500], loss:0.0005
epoch [154/500], loss:0.0004
epoch [155/500], loss:0.0005
epoch [156/500], loss:0.0006
epoch [157/500], loss:0.0005
epoch [158/500], loss:0.0005
epoch [159/500], loss:0.0007
epoch [160/500], loss:0.0005
epoch [161/500], loss:0.0006
epoch [162/500], loss:0.0008
epoch [163/500], loss:0.0007
epoch [164/500], loss:0.0011
epoch [165/500], loss:0.0010
epoch [166/500], loss:0.0007
epoch [167/500], loss:0.0011
epoch [168/500], loss:0.0009
epoch [169/500], loss:0.0005
epoch [170/500], loss:0.0009
epoch [171/500], loss:0.0010
epoch [172/500], loss:0.0007
epoch [173/500], loss:0.0011
epoch [174/500], loss:0.0007
epoch [175/500], loss:0.0008
epoch [176/500], loss:0.0010
epoch [177/500], loss:0.0009
epoch [178/500], loss:0.0005
epoch [179/500

epoch [428/500], loss:0.0007
epoch [429/500], loss:0.0012
epoch [430/500], loss:0.0008
epoch [431/500], loss:0.0006
epoch [432/500], loss:0.0011
epoch [433/500], loss:0.0009
epoch [434/500], loss:0.0008
epoch [435/500], loss:0.0011
epoch [436/500], loss:0.0009
epoch [437/500], loss:0.0005
epoch [438/500], loss:0.0009
epoch [439/500], loss:0.0008
epoch [440/500], loss:0.0005
epoch [441/500], loss:0.0011
epoch [442/500], loss:0.0008
epoch [443/500], loss:0.0004
epoch [444/500], loss:0.0004
epoch [445/500], loss:0.0006
epoch [446/500], loss:0.0004
epoch [447/500], loss:0.0007
epoch [448/500], loss:0.0005
epoch [449/500], loss:0.0005
epoch [450/500], loss:0.0007
epoch [451/500], loss:0.0006
epoch [452/500], loss:0.0004
epoch [453/500], loss:0.0005
epoch [454/500], loss:0.0005
epoch [455/500], loss:0.0006
epoch [456/500], loss:0.0005
epoch [457/500], loss:0.0004
epoch [458/500], loss:0.0006
epoch [459/500], loss:0.0004
epoch [460/500], loss:0.0005
epoch [461/500], loss:0.0004
epoch [462/500

epoch [215/500], loss:0.0006
epoch [216/500], loss:0.0009
epoch [217/500], loss:0.0006
epoch [218/500], loss:0.0007
epoch [219/500], loss:0.0010
epoch [220/500], loss:0.0006
epoch [221/500], loss:0.0006
epoch [222/500], loss:0.0009
epoch [223/500], loss:0.0005
epoch [224/500], loss:0.0005
epoch [225/500], loss:0.0005
epoch [226/500], loss:0.0004
epoch [227/500], loss:0.0005
epoch [228/500], loss:0.0005
epoch [229/500], loss:0.0005
epoch [230/500], loss:0.0004
epoch [231/500], loss:0.0004
epoch [232/500], loss:0.0004
epoch [233/500], loss:0.0004
epoch [234/500], loss:0.0005
epoch [235/500], loss:0.0007
epoch [236/500], loss:0.0005
epoch [237/500], loss:0.0004
epoch [238/500], loss:0.0004
epoch [239/500], loss:0.0004
epoch [240/500], loss:0.0007
epoch [241/500], loss:0.0004
epoch [242/500], loss:0.0005
epoch [243/500], loss:0.0005
epoch [244/500], loss:0.0005
epoch [245/500], loss:0.0004
epoch [246/500], loss:0.0006
epoch [247/500], loss:0.0004
epoch [248/500], loss:0.0005
epoch [249/500

epoch [498/500], loss:0.0004
epoch [499/500], loss:0.0005
epoch [500/500], loss:0.0004
epoch [1/500], loss:0.0004
epoch [2/500], loss:0.0006
epoch [3/500], loss:0.0004
epoch [4/500], loss:0.0004
epoch [5/500], loss:0.0004
epoch [6/500], loss:0.0004
epoch [7/500], loss:0.0005
epoch [8/500], loss:0.0005
epoch [9/500], loss:0.0006
epoch [10/500], loss:0.0005
epoch [11/500], loss:0.0005
epoch [12/500], loss:0.0005
epoch [13/500], loss:0.0005
epoch [14/500], loss:0.0005
epoch [15/500], loss:0.0006
epoch [16/500], loss:0.0005
epoch [17/500], loss:0.0006
epoch [18/500], loss:0.0005
epoch [19/500], loss:0.0006
epoch [20/500], loss:0.0004
epoch [21/500], loss:0.0004
epoch [22/500], loss:0.0004
epoch [23/500], loss:0.0005
epoch [24/500], loss:0.0005
epoch [25/500], loss:0.0004
epoch [26/500], loss:0.0004
epoch [27/500], loss:0.0004
epoch [28/500], loss:0.0005
epoch [29/500], loss:0.0006
epoch [30/500], loss:0.0004
epoch [31/500], loss:0.0007
epoch [32/500], loss:0.0005
epoch [33/500], loss:0.000

epoch [285/500], loss:0.0005
epoch [286/500], loss:0.0005
epoch [287/500], loss:0.0006
epoch [288/500], loss:0.0007
epoch [289/500], loss:0.0005
epoch [290/500], loss:0.0005
epoch [291/500], loss:0.0006
epoch [292/500], loss:0.0006
epoch [293/500], loss:0.0005
epoch [294/500], loss:0.0007
epoch [295/500], loss:0.0005
epoch [296/500], loss:0.0006
epoch [297/500], loss:0.0005
epoch [298/500], loss:0.0007
epoch [299/500], loss:0.0006
epoch [300/500], loss:0.0006
epoch [301/500], loss:0.0005
epoch [302/500], loss:0.0007
epoch [303/500], loss:0.0006
epoch [304/500], loss:0.0004
epoch [305/500], loss:0.0004
epoch [306/500], loss:0.0004
epoch [307/500], loss:0.0004
epoch [308/500], loss:0.0004
epoch [309/500], loss:0.0004
epoch [310/500], loss:0.0004
epoch [311/500], loss:0.0005
epoch [312/500], loss:0.0004
epoch [313/500], loss:0.0004
epoch [314/500], loss:0.0004
epoch [315/500], loss:0.0003
epoch [316/500], loss:0.0004
epoch [317/500], loss:0.0004
epoch [318/500], loss:0.0004
epoch [319/500

epoch [71/500], loss:0.0004
epoch [72/500], loss:0.0005
epoch [73/500], loss:0.0006
epoch [74/500], loss:0.0005
epoch [75/500], loss:0.0008
epoch [76/500], loss:0.0005
epoch [77/500], loss:0.0006
epoch [78/500], loss:0.0005
epoch [79/500], loss:0.0005
epoch [80/500], loss:0.0007
epoch [81/500], loss:0.0005
epoch [82/500], loss:0.0005
epoch [83/500], loss:0.0005
epoch [84/500], loss:0.0005
epoch [85/500], loss:0.0006
epoch [86/500], loss:0.0007
epoch [87/500], loss:0.0006
epoch [88/500], loss:0.0005
epoch [89/500], loss:0.0005
epoch [90/500], loss:0.0004
epoch [91/500], loss:0.0004
epoch [92/500], loss:0.0007
epoch [93/500], loss:0.0004
epoch [94/500], loss:0.0004
epoch [95/500], loss:0.0004
epoch [96/500], loss:0.0003
epoch [97/500], loss:0.0004
epoch [98/500], loss:0.0004
epoch [99/500], loss:0.0005
epoch [100/500], loss:0.0003
epoch [101/500], loss:0.0004
epoch [102/500], loss:0.0004
epoch [103/500], loss:0.0004
epoch [104/500], loss:0.0005
epoch [105/500], loss:0.0007
epoch [106/500

epoch [355/500], loss:0.0004
epoch [356/500], loss:0.0006
epoch [357/500], loss:0.0006
epoch [358/500], loss:0.0006
epoch [359/500], loss:0.0006
epoch [360/500], loss:0.0005
epoch [361/500], loss:0.0005
epoch [362/500], loss:0.0005
epoch [363/500], loss:0.0004
epoch [364/500], loss:0.0003
epoch [365/500], loss:0.0004
epoch [366/500], loss:0.0004
epoch [367/500], loss:0.0004
epoch [368/500], loss:0.0006
epoch [369/500], loss:0.0005
epoch [370/500], loss:0.0004
epoch [371/500], loss:0.0004
epoch [372/500], loss:0.0004
epoch [373/500], loss:0.0004
epoch [374/500], loss:0.0004
epoch [375/500], loss:0.0004
epoch [376/500], loss:0.0004
epoch [377/500], loss:0.0005
epoch [378/500], loss:0.0003
epoch [379/500], loss:0.0004
epoch [380/500], loss:0.0003
epoch [381/500], loss:0.0004
epoch [382/500], loss:0.0005
epoch [383/500], loss:0.0005
epoch [384/500], loss:0.0005
epoch [385/500], loss:0.0005
epoch [386/500], loss:0.0004
epoch [387/500], loss:0.0004
epoch [388/500], loss:0.0004
epoch [389/500

In [22]:
synthesizer = Synthesizer(audioSample3.excitation, audioSample3.spectrum, audioSample3.spectra, audioSample3.sampleRate)
synthesizer.Synthesize(audioSample3.Pitch, 1.)
synthesizer.save("Output_Crf.wav")

In [14]:
output = specCrfAi.processData(trainSpectra[1][0], trainSpectra[1][-1], 0.5).detach()

In [None]:
class Synthesizer2:
    def __init__(self, Excitation, Spectrum, Spectra, SampleRate):
        self.excitation = Excitation
        self.spectrum = Spectrum
        self.spectra = Spectra
        self.sampleRate = SampleRate
        self.returnSignal = torch.tensor([], dtype = float)
        
    def Synthesize(self, pitch, steadiness):
        Window = torch.hann_window(pitch * 3)
        self.returnSignal = torch.stft(self.excitation, pitch * 3, hop_length = pitch, win_length = pitch * 3, window = Window, return_complex = True)
        self.returnSignal = torch.transpose(self.returnSignal, 0, 1)
        for i in range(self.spectra.size()[0]):
            self.returnSignal[i] = self.returnSignal[i] * (torch.exp(self.spectrum) + math.pow(1 - steadiness, 2) * torch.roll(torch.exp(self.spectra), 0, dims=0)[i])
        self.returnSignal = torch.transpose(self.returnSignal, 0, 1)
        self.returnSignal = torch.istft(self.returnSignal, pitch * 3, hop_length = pitch, win_length = pitch * 3, window = Window, onesided=True, )
        del Window
        
    def save(self, filepath):
        torchaudio.save(filepath, torch.unsqueeze(self.returnSignal, 0), self.sampleRate, format="wav", encoding="PCM_S", bits_per_sample=32)