In [1]:
import matplotlib.pyplot as plt
import numpy as np
import math
import random

import pandas as pd
import cv2

In [2]:
from graphviz import Digraph

def trace(root):
    nodes, edges = set(), set()
    def build(v):
        if v not in nodes:
            nodes.add(v)
        for child in v._prev:
            edges.add((child, v))
            build(child)
    build(root)
    return nodes, edges

def draw_dot(root):
    dot = Digraph(format = 'svg', graph_attr={'rankdir' : 'LR'})
    
    nodes, edges = trace(root)
    for n in nodes:
        uid = str(id(n))
        nodeText = "{%s | data %.4f | grad %.4f}" % (n.label, n.data, n.grad)
        dot.node(name = uid, label = nodeText, shape = 'record')
        if n._op:
            dot.node(name = uid + n._op, label = n._op)
            dot.edge(uid + n._op, uid)
            
    for n1, n2 in edges:
        dot.edge(str(id(n1)), str(id(n2)) + n2._op)
    return dot

In [3]:
class Value:
    """
    radd
    add
    
    sub
    neg
    rmul
    mul
    truediv
    pow
    exp
    tanh
    """
    def __init__(self, data, _children = (), _op = '', label = ''):
        self.data = data
        self.grad = 0.0
        self._backward = lambda : None
        self._prev = set(_children)
        self._op = _op
        self.label = label
        
    def __repr__(self):
        return f"Value(data={self.data})"
        
    def __add__(self, other):
        other = other if isinstance(other, Value) else Value(other)
        out = Value(self.data + other.data, _children = (self, other), _op = '+')
        
        def _backward():            
            self.grad += 1.0 * out.grad
            other.grad += 1.0 * out.grad
        out._backward = _backward
        
        return out
    
    def __mul__(self, other):
        other = other if isinstance(other, Value) else Value(other)
        out = Value(self.data * other.data, _children = (self, other), _op = '*')
        
        def _backward():            
            self.grad += other.data * out.grad
            other.grad += self.data * out.grad
        out._backward = _backward
        
        return out

    def __pow__(self, other):
        assert isinstance(other, (int, float)), "only supporting int/float powers" 
        out = Value(self.data**other, _children = (self, ), _op = f'**{other}')
        
        def _backward():
            self.grad += other * self.data**(other - 1) * out.grad
        out._backward = _backward
        
        return out
    
    def exp(self):
        out = Value(math.exp(self.data), _children = (self,), _op = 'exp')
        
        def _backward():
            self.grad += out.data * out.grad
        out._backward = _backward
        
        return out

    def tanh(self):
        n = self.data
        t = (math.exp(2*n) - 1)/(math.exp(2*n) + 1)
        out = Value(t, _children = (self,), _op = 'tanh')
        
        def _backward():            
            self.grad += (1 - t**2) * out.grad
        out._backward = _backward
        
        return out
    
    def log(self):
        out = Value(np.log(self.data), _children = (self,), _op = 'log')
        
        def _backward():  
            self.grad += (1 / self.data) * out.grad
        out._backward = _backward
        
        return out
    
    def relu(self):
        out = Value(max(0, self.data), _children = (self,), _op = 'relu')
        
        def _backward():            
            self.grad += (self.data > 0) * out.grad
        out._backward = _backward
        
        return out
    
    def __neg__(self):
        return self * -1
    
    def __radd__(self, other):
        return self + other
    
    def __sub__(self, other):
        return self + (-other)
    
    def __rmul__(self, other):
        return self * other 
    
    def __truediv__(self, other):
        return self * other ** -1
    
    def sigmoid(self):
        return self.exp()/(1 + self.exp())
    
    def backward(self):
        topo = []
        visited = set()
        def build_topo(v):
            if v not in visited:
                visited.add(v)
                for child in v._prev:
                    build_topo(child)
                topo.append(v)
        build_topo(self)
        
        self.grad = 1.0
        for node in reversed(topo):
            node._backward()

In [4]:
class Neuron:
    
    def __init__(self, dim, _activation = 'tanh'):
        self.w = [Value(random.uniform(-1,1)) for _ in range(dim)]
        self.b = Value(random.uniform(-1,1))
        self._activation = _activation
        
    def __call__(self, data):
        assert len(self.w) == len(data), "length of data for the neuron is not equal"
        act = sum((wi*xi for wi,xi in zip(self.w, data)), self.b)
        if self._activation == 'tanh':
            out = act.tanh()
        elif self._activation == 'relu':
            out = act.relu()
        elif self._activation == 'sigmoid':
            out = act.sigmoid()
        else:
            raise Exception("Sorry, no numbers below zero")
        return out
    
    def parameters(self):
        return self.w + [self.b]
    
class Layer:

    def __init__(self, nIn, nOut, _activation = 'tanh'):
        self._in = nIn
        self.neurons = [Neuron(nIn, _activation) for _ in range(nOut)]
    
    def __call__(self, data):
        assert self._in == len(data), "dimensions of data for the layer is not equal"
        act = [neuron(data) for neuron in self.neurons]
        return act
    
    def parameters(self):
        return [p for neuron in self.neurons for p in neuron.parameters()]

class MLP:
    def __init__(self, nIn, nOuts, _activation = 'tanh', _endSoftmax = False):
        self.softmaxFlag = _endSoftmax
        self.layers = []
        for nOut in nOuts:
            self.layers.append(Layer(nIn, nOut, _activation))
            nIn = nOut
            
    def __call__(self, data):
        for layer in self.layers:
            data = layer(data)
        if self.softmaxFlag:
            data = self.softmax(data)
        return data[0] if len(data) == 1 else data
    
    def softmax(self, output):
        denominator = sum((xi.exp() for xi in output))
        return [xi.exp()/denominator for xi in output]
    
    def parameters(self):
        return [p for layer in self.layers for p in layer.parameters()]

In [5]:
xs = [
    [2.0, 3.0, -1.0],
    [3.0, -1.0, 0.5],
    [0.5, 1.0 , 1.0],
    [1.0, 1.0, -1.0],
]
ys = [1.0, -1.0, -1.0, 1.0]

In [6]:
nIn = 3
nOuts = [4,4,1]
nnet = MLP(nIn, nOuts)

In [7]:
epochs = 20
learning_rate = 0.1
losses = [0]
for epoch in range(1, epochs + 1):
    ypred = [nnet(x) for x in xs]
    loss = sum((yout - ygt)**2 for ygt, yout in zip(ys, ypred))
    losses[0] = loss
    for p in nnet.parameters():
        p.grad = 0
    loss.backward()
    
    for p in nnet.parameters():
        p.data += -learning_rate * p.grad
    
    print(f'For Epoch: {epoch}, loss is {loss}')

For Epoch: 1, loss is Value(data=8.366874758289365)
For Epoch: 2, loss is Value(data=5.982754722759114)
For Epoch: 3, loss is Value(data=4.630534065985107)
For Epoch: 4, loss is Value(data=3.8964351738689893)
For Epoch: 5, loss is Value(data=3.3643354527849585)
For Epoch: 6, loss is Value(data=3.0675857091797556)
For Epoch: 7, loss is Value(data=2.9309055296362443)
For Epoch: 8, loss is Value(data=2.6153740173176536)
For Epoch: 9, loss is Value(data=0.448378805623599)
For Epoch: 10, loss is Value(data=0.04935689340656739)
For Epoch: 11, loss is Value(data=0.04040286540101691)
For Epoch: 12, loss is Value(data=0.03469903315216916)
For Epoch: 13, loss is Value(data=0.030679102828642765)
For Epoch: 14, loss is Value(data=0.02765348233682992)
For Epoch: 15, loss is Value(data=0.02526900793826171)
For Epoch: 16, loss is Value(data=0.023325322781192797)
For Epoch: 17, loss is Value(data=0.02169988906883575)
For Epoch: 18, loss is Value(data=0.020313274990860913)
For Epoch: 19, loss is Value(

# CHINESE MNIST

In [8]:
PATH = '/Users/potrohit/Documents/GitHub/Machine Learning/Dataset/Chinese MNIST/'
SOURCE_FILE = 'chinese_mnist.csv'

IMAGE_PATH = '/data/data/'

DIM_LENGTH = 16
DIM_WIDTH = 16

In [9]:
df = pd.read_csv(PATH + SOURCE_FILE)

In [10]:
def create_file_name(x):
    return f"input_{x[0]}_{x[1]}_{x[2]}.jpg"

def read_image_size(x, grayscale = True):
    return list(cv2.imread(PATH + IMAGE_PATH + x, 0).shape)

def read_image(x, grayscale = True):
    image = cv2.imread(PATH + IMAGE_PATH + x, 0)
    image = cv2.resize(image, (DIM_LENGTH, DIM_WIDTH), interpolation = cv2.INTER_AREA)
    return np.array(image)

def flatten_image(image):
    return image.flatten()


In [11]:
uniqueMNIST = sorted((df['value'] .unique()))
Y = np.array(pd.get_dummies(df['value'], drop_first=False))

In [12]:
X = np.array(df.apply(create_file_name, axis = 1).apply(read_image).apply(flatten_image))

In [13]:
SIZE = DIM_LENGTH * DIM_WIDTH

out = len(uniqueMNIST)
nIn = SIZE
nOuts = [64, 32, 16, out]
model = MLP(nIn, nOuts, _activation = 'tanh', _endSoftmax = True)

In [14]:
def lossFunc(ypred, ys):
    return -sum(ygt * yout.log() for ygt, yout in zip(ys, ypred))

In [20]:
for x in X:
    print(x)
    t = model(x)
    print(t)
    print("f")

[ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  2  4  5  3  0  0  0  0  0  0  0  0  0  0  0  0  2 22
 19  1  0  0  0  0  0  0  0  0  0  0  0  0  4 42  6  4  0  0  0  0  0  0
  0  0  0  0  1  2 40 87 50 44  0  0  0  0  0  0  0  0  0  0  4 42 45 17
 34 13  0  0  0  0  0  0  0  0  0  0  4  5 48  4 46  2  0  0  0  0  0  0
  0  0  0  0  3 19 33  1 47  2  9 13  0  0  0  0  0  0  0  0  2 31  4  3
 65 21 64 16  0  0  0  0  0  0  0  0  0  0  0  0 12 35  4  1  0  0  0  0
  0  0  0  0  0  0  0  0  2  1  1  1  0  0  0  0]
[Value(data=0.022209517034258375), Value(data=0.10226508193685961), Value(data=0.11846733784649914), Value(data=0.11727337319873522), Value(data=0.016221926591059383), Value(data=0.11971840814380424), Value(data=0.114649

OverflowError: math range error

In [36]:
epochs = 20
learning_rate = 0.1
losses = [0]
for epoch in range(1, epochs + 1):
    ypred = [nnet(x) for x in X]
    loss = lossFunc(ypred, Y)
    losses[0] = loss
    for p in nnet.parameters():
        p.grad = 0
    loss.backward()
    
    for p in nnet.parameters():
        p.data += -learning_rate * p.grad
    
    print(f'For Epoch: {epoch}, loss is {loss}')

[Value(data=0.02226180245017175),
 Value(data=0.0346191568047928),
 Value(data=0.027671686716018076),
 Value(data=0.01985319030510568),
 Value(data=0.13334588326968627),
 Value(data=0.08479302884910092),
 Value(data=0.01971750519870176),
 Value(data=0.13870448079816117),
 Value(data=0.02077559174077529),
 Value(data=0.019739843768080495),
 Value(data=0.14108568294973178),
 Value(data=0.12703455891467505),
 Value(data=0.14421237706547438),
 Value(data=0.046273670410340986),
 Value(data=0.01991154075918339)]

In [53]:
"""
# getting the data
1. read.csv
2. get_image, flatten the image and pair up with truth_label

#Data Analysis
1. truth_label distribution
2. 

#Data spliting
1.split the data to training and testing

#Data loading

"""

array([0, 0, 0, ..., 0, 0, 0], dtype=uint8)

In [None]:
import sklearn.impute import SimpleImputer
import sklearn.preprocessing import OneHotEncoder

import sklearn.base import BaseEstimator, TransfomerMixin

class getImage(BaseEstimator, TransfomerMixin):
    
    def fit(self, X, Y = None):
        return self
    def transform(self, X):
        return