Polling layer backprop WIP

JanHalozan · Oct 3, 2016 · 17d9371 · 17d9371
1 parent 8a2147a
commit 17d9371
Show file tree

Hide file tree

Showing 4 changed files with 129 additions and 62 deletions.
diff --git a/src/core/layers/convolutionlayer.cpp b/src/core/layers/convolutionlayer.cpp
@@ -17,7 +17,6 @@ sf::ConvolutionLayer::ConvolutionLayer() : Layer(), stride(1), kernelSide(3), ze
 
 sf::ConvolutionLayer::~ConvolutionLayer()
 {
-
 }
 
 void sf::ConvolutionLayer::calculateOutput()
@@ -174,6 +173,17 @@ void sf::ConvolutionLayer::reserveNeurons(ulong count)
     this->resolveGradientCapacity();
 }
 
+double sf::ConvolutionLayer::getGradientOfNeuron(ulong neuronIndex) const
+{
+    const ulong sliceSize = this->outputWidth * this->outputHeight;
+    const ulong depthNeuron = neuronIndex / sliceSize; //Floors
+    neuronIndex -= depthNeuron * sliceSize;
+
+    const auto &n = this->neurons->at(depthNeuron);
+
+    return n.getGradient(neuronIndex);
+}
+
 void sf::ConvolutionLayer::resolveGradientCapacity()
 {
     const ulong oWidth = (this->inputWidth - this->kernelSide + 2 * this->zeroPaddingSize) / this->stride + 1;

diff --git a/src/core/layers/convolutionlayer.h b/src/core/layers/convolutionlayer.h
@@ -34,6 +34,7 @@ class sf::ConvolutionLayer : public sf::Layer
     void recalculateWeights() override;
 
     void reserveNeurons(ulong count) override;
+    double getGradientOfNeuron(ulong neuronIndex) const override;
 
     void setStride(unsigned short stride);
     unsigned short getStride() const;

diff --git a/src/core/layers/poolinglayer.cpp b/src/core/layers/poolinglayer.cpp
@@ -107,32 +107,51 @@ void sf::PoolingLayer::backprop(sf::Layer *, sf::Layer *nextLayer)
     //Reset the entire gradient map
     memset(this->gradients, 0, totalInputSize * sizeof(double));
 
+    ulong i = 0; //Weight indexer
+
     for (ulong lyr = 0; lyr < this->outputDepth; ++lyr)
     {
         for (ulong row = 0; row < this->outputHeight; ++row)
         {
             for (ulong col = 0; col < this->outputWidth; ++col)
             {
-                //Convolutional layer only has one neuron
-                auto index = nextLayer->type == kLayerTypeConvolutional ? 0 : col + (row * this->outputWidth) + (lyr * outputSliceSize);
+                double gradient;
+
+                if (nextLayer->type == kLayerTypeHiddenNeuron)
+                {
+                    double gradientSum = 0.0;
+
+                    for (const auto &nextLayerNeuron : nextLayer->getNeurons())
+                        gradientSum += nextLayerNeuron.getGradient() * nextLayerNeuron.getWeight(i + 1); //i + 1 because index 0 is the bias
+
+                    gradient = gradientSum;
+                }
+                else
+                {
+
+
+                    gradient = 0;
+                }
+
+                auto index = col + (row * this->outputWidth) + (lyr * outputSliceSize);
                 const auto routeIndex = this->selectedFilterIndexes[index];
 
                 //Start index of the gradient frame
-                ulong gradientIndex = (col * this->stride) + (row * this->outputWidth * this->stride) + (lyr * inputSliceSize);
+                ulong gradientIndex = (col * this->stride) + (row * this->inputWidth * this->stride) + (lyr * inputSliceSize);
                 const ulong gradientCol = routeIndex % this->stride;
                 const ulong gradientRow = routeIndex / this->stride;
                 gradientIndex += gradientCol + (gradientRow * this->inputWidth);
 
-                const auto sigmoidOutput = this->input[gradientIndex];
-                const auto gradient = nextLayer->neurons->at(index).getGradient() * sigmoidOutput * (1.0 - sigmoidOutput);
-
                 this->gradients[gradientIndex] = gradient;
+
+                i++;
             }
         }
     }
+
 }
 
 double sf::PoolingLayer::getGradientOfNeuron(ulong neuronIndex) const
 {
     return this->gradients[neuronIndex];
-}
+}
diff --git a/tests/main.cpp b/tests/main.cpp
@@ -21,6 +21,43 @@ int main(int argc, char const *argv[])
 #pragma unused(argc)
 #pragma unused(argv)
 
+    //Test for pooling layer backprop
+    {
+        using namespace sf;
+        PoolingLayer *layer = new PoolingLayer();
+
+        double data[] = {
+            4, 5, 1, 1,
+            6, 7, 1, 1,
+            2, 9, 2, 3,
+            3, 6, 4, 5,
+
+            3, 2, 6, 5,
+            1, 1, 4, 2,
+            0, -2, 5, 9,
+            4, 3, 0, 2
+        };
+
+        layer->loadInput(data, 4, 4, 1);
+        layer->calculateOutput();
+
+        ulong w, h, d;
+        double *res = layer->getOutput(w, h, d);
+
+        HiddenNeuronLayer *hdLayer = new HiddenNeuronLayer();
+        hdLayer->reserveNeurons(4);
+        hdLayer->loadInput(res, w, h);
+        hdLayer->calculateOutput();
+
+        double *ress = hdLayer->getOutput(w, h, d);
+        hdLayer->neurons->at(0).setGradient(2);
+        hdLayer->neurons->at(1).setGradient(3);
+
+        layer->backprop(nullptr, hdLayer);
+
+        return 0;
+    }
+
 //    //Test for loader
 //    {
 //        const auto path = "/Users/janhalozan/Work/C++/ConvolutionalNeuralNetwork/fixture/cifar-10-batches-bin/";
@@ -133,60 +170,60 @@ int main(int argc, char const *argv[])
 //    }
 
 
-    //A really really really simple example of a MLP. Samples 1 & 2 are similar, so are 3 & 4 and 5 & 6. When the net is trained we feed it an example
-    // similar to first two samples and if the answer is class 0 then the MLP is working correctly.
-    {
-        using namespace sf;
-
-        //Size of our input data
-        const unsigned long inputWidth = 3;
-        const unsigned long inputHeight = 1;
-
-        //A bunch of samples. The 1 & 2 are similar so are 3 & 4 and 5 & 6.
-        double sample1[] = {1.0, 0.2, 0.1};     //Cow
-        double sample2[] = {0.8, 0.1, 0.25};    //Cow
-        double sample3[] = {0.2, 0.95, 0.1};    //Chicken
-        double sample4[] = {0.11, 0.9, 0.13};   //Chicken
-        double sample5[] = {0.0, 0.2, 0.91};    //Car
-        double sample6[] = {0.21, 0.12, 1.0};   //Car
-
-        //A new network with the given data width and height
-        Net *net = new Net(inputWidth, inputHeight);
-
-        sf::LayerDescriptor descriptor;
-        descriptor.type = kLayerTypeHiddenNeuron;
-        descriptor.neuronCount = 4;
-
-        net->addLayer(descriptor); //A hidden neural layer with 4 neurons
-        net->addLayer(descriptor); //A hidden neural layer with 4 neurons
-
-        descriptor.type = kLayerTypeOutputNeuron;
-
-        net->addLayer(descriptor); //Finish it off by adding an output layer
-
-        //Add all the samples with their corresponding labels
-        net->addTrainingSample(sample1, "cow");
-        net->addTrainingSample(sample2, "cow");
-        net->addTrainingSample(sample3, "chicken");
-        net->addTrainingSample(sample4, "chicken");
-        net->addTrainingSample(sample5, "car");
-        net->addTrainingSample(sample6, "car");
-
-        //And now we play the waiting game
-        net->train();
-
-        //This example is similar to "chicken" so we expect the chicken probability to be close to 1 and car and cow to be close to 0
-        double example[] = {0.1, 0.98, 0.01};
-        auto output = net->classifySample(example);
-
-        //Let's see what we get
-        for (auto &tuple : output)
-            std::cout << std::get<1>(tuple) << ": " << std::get<0>(tuple) << std::endl;
-
-        std::cout << std::endl;
-
-        return 0;
-    }
+//    //A really really really simple example of a MLP. Samples 1 & 2 are similar, so are 3 & 4 and 5 & 6. When the net is trained we feed it an example
+//    // similar to first two samples and if the answer is class 0 then the MLP is working correctly.
+//    {
+//        using namespace sf;
+//
+//        //Size of our input data
+//        const unsigned long inputWidth = 3;
+//        const unsigned long inputHeight = 1;
+//
+//        //A bunch of samples. The 1 & 2 are similar so are 3 & 4 and 5 & 6.
+//        double sample1[] = {1.0, 0.2, 0.1};     //Cow
+//        double sample2[] = {0.8, 0.1, 0.25};    //Cow
+//        double sample3[] = {0.2, 0.95, 0.1};    //Chicken
+//        double sample4[] = {0.11, 0.9, 0.13};   //Chicken
+//        double sample5[] = {0.0, 0.2, 0.91};    //Car
+//        double sample6[] = {0.21, 0.12, 1.0};   //Car
+//
+//        //A new network with the given data width and height
+//        Net *net = new Net(inputWidth, inputHeight);
+//        
+//        sf::LayerDescriptor descriptor;
+//        descriptor.type = kLayerTypeHiddenNeuron;
+//        descriptor.neuronCount = 4;
+//        
+//        net->addLayer(descriptor); //A hidden neural layer with 4 neurons
+//        net->addLayer(descriptor); //A hidden neural layer with 4 neurons
+//        
+//        descriptor.type = kLayerTypeOutputNeuron;
+//        
+//        net->addLayer(descriptor); //Finish it off by adding an output layer
+//        
+//        //Add all the samples with their corresponding labels
+//        net->addTrainingSample(sample1, "cow");
+//        net->addTrainingSample(sample2, "cow");
+//        net->addTrainingSample(sample3, "chicken");
+//        net->addTrainingSample(sample4, "chicken");
+//        net->addTrainingSample(sample5, "car");
+//        net->addTrainingSample(sample6, "car");
+//
+//        //And now we play the waiting game
+//        net->train();
+//
+//        //This example is similar to "chicken" so we expect the chicken probability to be close to 1 and car and cow to be close to 0
+//        double example[] = {0.1, 0.98, 0.01};
+//        auto output = net->classifySample(example);
+//
+//        //Let's see what we get
+//        for (auto &tuple : output)
+//            std::cout << std::get<1>(tuple) << ": " << std::get<0>(tuple) << std::endl;
+//
+//        std::cout << std::endl;
+//
+//        return 0;
+//    }
 
 //    {
 //