In [66]:
import pandas as pd
import numpy as np
import math
import os

# import for time, and file change
import json
from PIL import Image

# frequent updating
import time
import random

In [67]:
# define filepaths
workspace = "/Users/advaysingh/Documents/projects/hindi_classification/" 
#print("Current workspace:", workspace)

data = os.path.join(workspace, 'data/Hindi/')
dict_lib = os.path.join(workspace, 'data/dict.csv')
img_path = os.path.join(workspace, 'server/snapshot.png')
out_file = os.path.join(workspace, 'server/outputs.json')


In [70]:
# create train/test dicts with files and labels

def create_dict(x: str) -> dict:
    x_dict = {}
    i = 0
    for dir in os.listdir(os.path.join(data, x)):
        for file in os.listdir(os.path.join(data, x, dir)):
            x_dict[os.path.join(data, x, dir, file)] = i
        i += 1
    return x_dict

# make pandas df

train_df = pd.DataFrame.from_dict(create_dict('Train'), orient='index')
test_df = pd.DataFrame.from_dict(create_dict('Test'), orient='index')

#print(train_df)
#df_temp = train_df[0].drop_duplicates()
# print(df_temp)

In [495]:
# Activations class
class Activation:
    # can add more types
    def __init__(self, act_type: str) -> None:
        self.act = act_type

    def print_act(self) -> None:
        return self.act
    
    def compute(self, z) -> list:

        # Sigmoid function
        if (self.act == 'sigmoid'):
            n = []
            for val in np.array(z):
                n.append(1 / (1 + np.exp(-val)))
            return n
        
        # ReLU function
        elif (self.act == 'relu'):
            vals = []
            for val in z:
                if val < 0.0:
                    vals.append(0.0)
                else:
                    vals.append(1.0)
            return vals

    def prime(self, z: list):

        # Sigmoid prime
        if (self.act == 'sigmoid'):
            sigs = self.compute(z)
            vals = []
            for val in sigs:
                vals.append(val * (1 - val))
            return vals
        
        # ReLU prime
        if (self.act == 'relu'):
            vals = []
            for val in z:
                if val == 0:
                    vals.append(0.0)
                else:
                    vals.append(1.0)
            return vals


def softmax(costs: list) -> list:
    exp_vals = []
    for cost in np.array(costs):
        exp_vals.append(np.exp(cost))
    return_vals = []
    for i in range(len(exp_vals)):
        return_vals.append(exp_vals[i] / sum(exp_vals))
    return return_vals

def softmax_prime(costs: list) -> list:
    softs = softmax(costs)
    vals = []
    for val in softs:
        vals.append(val * (1 - val))
    return vals

n = softmax([2, 3, 4])
print(n)

[0.09003057317038046, 0.24472847105479767, 0.6652409557748219]


In [547]:

class Model:
    def __init__(self, train_df: pd.DataFrame,
                  act: Activation, layers: list, step: float, stochastic: bool, load_perams: bool) -> None:
        self.train_df = train_df

        # create activation function
        self.act = act
        self.layers = layers

        # init weight and bias with # layers np arrays
        # Using the Xavier weight init method https://machinelearningmastery.com/weight-initialization-for-deep-learning-neural-networks/
        self.all_weights = [None] * (int(len(layers)))
        self.all_bias = [None] * (int(len(layers)))
        if (load_perams):
            layer = 0
            weightlist = os.listdir(os.path.join(workspace, 'data', 'Hyper_p/Weights'))
            for file in sorted(weightlist, key=lambda s: s.lower()):
                self.all_weights[layer] = pd.read_csv(os.path.join(workspace, 'data', 'Hyper_p/Weights', file)).to_numpy()
                #self.all_weights[layer] = np.delete(self.all_weights, [0], axis=1)
                print(self.all_weights[layer].shape)
                layer += 1
            layer = 0
            biaslist = os.listdir(os.path.join(workspace, 'data', 'Hyper_p/Biases'))
            for file in sorted(biaslist, key=lambda s: s.lower()):
                self.all_bias[layer] = pd.read_csv(os.path.join(workspace, 'data', 'Hyper_p/Biases', file)).to_numpy()
                layer += 1
            print(self.all_weights[0])
        else:
            for i in range(len(layers)):
                self.all_weights[i] = self.random_arrs(i)
                if i == 0:
                    self.all_bias[i] = np.random.uniform(low=-0.03125, high=0.03125, size=(self.layers[i],1))
                else:
                    bias_val = 1 / math.sqrt(self.layers[i - 1])
                    self.all_bias[i] = np.random.uniform(low=-bias_val, high=bias_val, size=(self.layers[i],1))
            #for val in self.all_weights:
               # print(val.shape)

        # init learn speed
        self.speed = step

        # determine gradient decent type
        self.stochastic = stochastic

    def random_arrs(self, layer: int):
        if (layer == 0):
            return(np.random.uniform(low=-0.03125, high=0.03125, size=(self.layers[layer],1024)))
        else:
            weight_val = 1 / math.sqrt(self.layers[layer - 1])
            return(np.random.uniform(low=-weight_val, high=weight_val, size=(self.layers[layer], self.layers[layer-1])))

    def img_to_np(self, dir) -> np.array:
        return np.array(Image.open(dir)).flatten()


    """def print_weights(self) -> None:
        for arr in self.all_weights:
            print(arr.shape)"""

    def prop_forward(self, inputs: list, layer: int) -> list:
        a_vals = [0.0] * int(self.layers[layer])
        a_vals = np.dot(self.all_weights[layer], pd.DataFrame(inputs)) + self.all_bias[layer]
        if (layer == len(self.layers) - 1):
            return (softmax(a_vals))
        return(self.act.compute(a_vals))
    



    def prop_backword(self, x: list, label: list):
        delta_w = [None] * len(self.layers)
        delta_b = [None] * len(self.layers)
        inputs = []
        for i in range(len(self.layers)):
            inputs.append(x)
            x = self.prop_forward(x, i)
        error = [a - b for a, b in zip(x, label)]
        vals = np.array([a * b for a, b in zip(error, softmax_prime(x))])
        for i in range(len(self.layers), 0, -1):
            delta_w[i - 1] = np.dot(pd.DataFrame(vals), pd.DataFrame(inputs[i - 1]).transpose())
            delta_b[i - 1] = vals
            #print(pd.DataFrame(delta_w[i]).transpose().shape, pd.DataFrame(vals).shape)
            vals = np.dot(pd.DataFrame(self.all_weights[i - 1]).transpose(), pd.DataFrame(vals))
            vals = [a * b for a, b in zip(vals, self.act.prime(inputs[i - 1]))]
            print(np.dot(pd.DataFrame(vals), pd.DataFrame(inputs[0]).transpose()))
        #print(delta_w[0])
        return delta_w, delta_b

            


    # potentially reconsider for time complexity
    def vals_for_softmax(self, index: int) -> list:
        vals = self.train_df[0].unique()
        for i in range(len(vals)):
            if vals[i] != index:
                vals[i] = 0
            else:
                vals[i] = 1
        return vals


    def train(self, epochs: int) -> None:
        length_set = len(self.train_df)
        for epoch in  range(epochs):
            delta_w = [np.zeros_like(array) for array in self.all_weights]
            delta_b = [np.zeros_like(array) for array in self.all_bias]
            count = 0
            for img_dir, row in self.train_df.iterrows():
                #print("img", count)
                count += 1
                temp_w, temp_b = self.prop_backword(self.img_to_np(img_dir).tolist(), self.vals_for_softmax(row[0]))
                for i in range(len(self.layers)):
                    delta_b[i] += temp_b[i]
                    delta_w[i] += temp_w[i]
                delta_w += temp_w
            delta_w = [array / length_set for array in delta_w]
            delta_b = [np.array(array) / length_set for array in delta_b]
            for i in range(len(self.layers)):
                self.all_weights[i] -= delta_w[i] * self.speed
                self.all_bias[i] -= delta_b[i] * self.speed
            print("validating")
            print("epoch: ", epoch)
            self.validate(self.train_df)

    def pred(self, img: np.array) -> int:
        for i in range(len(self.layers)):
            img = self.prop_forward(img, i)
        return img

    def validate(self, test_df: pd.DataFrame) -> None:
        count, countright = 0, 0
        for img_dir, row in test_df.iterrows():
            outputs = self.pred(self.img_to_np(img_dir))
            actual = list(self.vals_for_softmax(row[0]))
            if (outputs.index(max(outputs))) == (actual.index(max(actual))):
                countright += 1
            count += 1
            accuracy = (float(countright) / float(count)) * 100
        print("count: ", count, "countright: ", countright, ". Accuracy: ", accuracy, "%")

    def save(self) -> None:
        hp_data = '/Users/advaysingh/Documents/projects/hindi_classification/data/hyper_p'
        hp_data_biases, hp_data_weights = hp_data + '/Biases', hp_data + '/Weights'
        os.makedirs(hp_data)
        os.makedirs(hp_data_biases)
        os.makedirs(hp_data_weights)
        for layer in range(len(self.layers)):
            df_weights = pd.DataFrame(self.all_weights[layer])
            df_bias = pd.DataFrame(self.all_bias[layer])
            weights_file_name = "layer_" + str(layer) + "weights.csv"
            bias_file_name = "layer_" + str(layer) + "bias.csv"
            weights_file = open(os.path.join(hp_data_weights, weights_file_name), "x")
            bias_file = open(os.path.join(hp_data_biases, bias_file_name), "x")
            df_weights.to_csv(weights_file, index=False)
            df_bias.to_csv(bias_file, index=False)


    def print_weights(self, layer: int) -> None:
        for row in self.all_weights[layer]:
            for weight in row:
                print(weight)



In [548]:
new_test = test_df.sample(500)
cnn = Model(new_test, Activation('relu'), [5, 46], 1000, False, False)
cnn.print_weights(0)
cnn.train(1)

#cnn.prop_forward([3] * 1024, 0)
#cnn.save()
#cnn.print_weights(1)
#cnn.validate(test_df)
#cnn.train(1) # something for committing

-0.01062750715839994
0.007652252567460313
0.023541088075206086
0.018299782267635976
0.00952854812791578
-0.005787265165819441
0.01654097853590141
0.014197685339172456
0.016768244977692144
0.028779695909494134
0.03122622968848028
0.004228137099479598
0.014126225986520344
-0.030124283555381354
0.007103166447919654
-0.0019175524882018916
-0.010329110647268942
-0.02557543011229097
0.01634520378304627
-0.015464278083665843
0.03106186718174673
-0.025982672704287148
-0.02874241782647849
0.015841746411766625
0.025719760639348152
-0.004662568291352691
-0.02957139854481696
0.01749754333101374
-0.011315181294155804
-0.005347669569164423
0.0007148730801590647
-0.022477951296553528
-0.003948523077377683
0.009474810329202707
0.01994492232916629
-0.02096293686092022
0.027182926896913172
-0.000309093124453097
0.02104313892836466
0.031170918541900114
0.020138399841702743
0.006267461175505341
0.02138288530781597
-0.0026168353857764365
0.030180600105056854
-0.027653253375678442
-0.006334328971110477
-0.0

TypeError: list indices must be integers or slices, not tuple

In [546]:
cnn.print_weights(0)

-0.005547120615297413
-0.011746912464233768
0.004625354997433502
0.014287985322078298
0.005184932288150673
-0.013737964413632196
0.00900810504721801
-0.009935247303208425
-0.02658371512048089
-0.01584297040434221
-0.004238665125223605
-0.019979863409294023
-0.022453134525926607
-0.014658180072926054
-0.013965898225901018
-0.020139065559801717
0.028537913385258594
0.02266986341782752
-0.00828879872178536
-0.01590310127019553
-0.020476170553033163
-0.004561618689192061
-0.011670802854894163
-0.01375932667121943
-0.006719468581312504
-0.004519029150003351
-0.01568870189506299
0.01880890524230778
0.021780220169542043
-0.012797563582387775
-0.0064516406169972534
-0.020640367475913246
-0.020234443555418632
0.013625772950276366
0.02107450089121056
0.007542309968073264
-0.02800549880900633
-0.030700377354083784
-0.023374053057978954
0.00873450379872627
0.011882592958550847
-0.007180359438423656
-0.003864966705324442
0.00114608559061629
-0.0012207453139293015
-0.030682406812071288
0.02793612651

In [433]:

cnn.validate(new_test)

count:  1000 countright:  18 . Accuracy:  1.7999999999999998 %


In [434]:
cnn.save()

In [435]:
import json
import time
import random

from PIL import Image

cnn = Model(train_df, Activation('sigmoid'), [5, 46], 1.2, False, True)
"""outs = cnn.pred(cnn.img_to_np(img_path))
index = outs.index(max(outs))
new_english_value = pd.read_csv(os.path.join(workspace, 'data', 'dict.csv'))[index][1]"""

def update_json_file(hindi_value, english_value):
    with open(out_file, 'r') as file:
        data = json.load(file)
    data['hindi'] = hindi_value
    data['english'] = english_value
    with open(out_file, 'w') as file:
        json.dump(data, file, indent=4)

# Define a list of possible values if applicable
hindi_values = ["हिं", "नया", "मूल्य", "उदाहरण"]
english_values = ["Eng", "New", "Value", "Example"]

# Loop to update the JSON file every second with a random value
try:
    while True:
        # Generate a random value from the lists
        new_hindi_value = random.choice(hindi_values)
        #new_english_value = random.choice(english_values)
        img = np.array(Image.open(img_path).convert("L").resize((32, 32))).flatten()
        index = cnn.pred(img).index(max(cnn.pred(img)))
        new_english_value = str(pd.read_csv(os.path.join(workspace, 'data', 'dict.csv')).iloc[index, 1])

        update_json_file(new_hindi_value, new_english_value)
        
        # Print the new values (for debug purposes)
        # print(f'Updated JSON file with hindi: {new_hindi_value}, english: {new_english_value}')

        # read in and resize img
 
        # read in image for debugging
        """img.show("img to classify")
        print(img.format)
        print(img.mode)
        print(img.size)"""

        # create np array and resize
        """img_data = np.array(img).flatten()
        print(img_data.shape)
        print(img_data)"""

        
        time.sleep(10)  # Wait for 1 second
except KeyboardInterrupt:
    print("Update stopped by user.")

(30, 1024)
(46, 30)
[[ 2.39089813e-02 -8.36896648e-03  4.12625345e-03 ...  1.92886718e-02
  -2.62602651e-02  7.81807686e-03]
 [ 1.65654745e-02 -2.85756457e-02 -2.05020029e-02 ...  2.96755714e-02
   5.90393481e-03  3.04189374e-02]
 [-3.06255802e-02 -1.04373009e-02  2.85418886e-02 ... -3.26567078e-03
  -3.17323903e-03  1.28939421e-02]
 ...
 [ 1.23610976e-02  1.70870990e-02  2.62980493e-02 ... -1.32559640e-02
   1.76780582e-02 -1.63715496e-02]
 [-2.22031978e-02  2.44954589e-03  4.26358205e-03 ... -5.18734768e-03
  -2.90651679e-05 -1.56999077e-02]
 [-2.17360446e-02 -2.44752201e-02  2.81907141e-02 ...  7.88017800e-03
   5.61672831e-03  1.36145227e-05]]
Update stopped by user.
