In [66]:
import pandas as pd
import numpy as np
import math
import os

# import for time, and file change
import json
from PIL import Image

# frequent updating
import time
import random

In [67]:
# define filepaths
workspace = "/Users/advaysingh/Documents/projects/hindi_classification/" 
#print("Current workspace:", workspace)

data = os.path.join(workspace, 'data/Hindi/')
dict_lib = os.path.join(workspace, 'data/dict.csv')
img_path = os.path.join(workspace, 'server/snapshot.png')
out_file = os.path.join(workspace, 'server/outputs.json')


In [70]:
# create train/test dicts with files and labels

def create_dict(x: str) -> dict:
    x_dict = {}
    i = 0
    for dir in os.listdir(os.path.join(data, x)):
        for file in os.listdir(os.path.join(data, x, dir)):
            x_dict[os.path.join(data, x, dir, file)] = i
        i += 1
    return x_dict

# make pandas df

train_df = pd.DataFrame.from_dict(create_dict('Train'), orient='index')
test_df = pd.DataFrame.from_dict(create_dict('Test'), orient='index')

#print(train_df)
#df_temp = train_df[0].drop_duplicates()
# print(df_temp)

In [242]:
# Activations class
class Activation:
    # can add more types
    def __init__(self, act_type: str) -> None:
        self.act = act_type

    def print_act(self) -> None:
        return self.act
    
    def compute(self, z):

        # Sigmoid function
        if (self.act == 'sigmoid'):
            n = []
            for val in z:
                n.append(1 / (1 + math.exp(val)))
            return n
        
        # ReLU function
        elif (self.act == 'relu'):
            return np.maximum(0, z)

    def prime(self, z: list):

        # Sigmoid prime
        if (self.act == 'sigmoid'):
            return (self.compute(z)) * (1 - self.compute(z))
        
        # ReLU prime
        if (self.act == 'relu'):
            return int(z > 0)


def softmax(costs: list) -> list:
    exp_vals = []
    for cost in costs:
        print(math.exp(cost))
        exp_vals.append(math.exp(cost))
    return_vals = []
    for i in range(len(exp_vals)):
        return_vals.append(exp_vals[i] / sum(exp_vals))
    return return_vals

def softmax_prime(costs: list) -> list:
    return softmax(costs) * (1 - softmax(costs))

n = softmax([2, 3, 4])
print(n)

7.38905609893065
20.085536923187668
54.598150033144236
[0.09003057317038046, 0.24472847105479767, 0.6652409557748219]


In [259]:

class Model:
    def __init__(self, train_df: pd.DataFrame,
                  act: Activation, layers: list, step: float, stochastic: bool, load_perams: bool) -> None:
        self.train_df = train_df

        # create activation function
        self.act = act
        self.layers = layers

        # init weight and bias with # layers np arrays
        self.all_weights = [None] * (int(len(layers)))
        self.all_bias = [None] * (int(len(layers)))
        if (load_perams):
            layer = 0
            weightlist = os.listdir(os.path.join(workspace, 'data', 'Hyper_p/Weights'))
            for file in sorted(weightlist, key=lambda s: s.lower()):
                self.all_weights[layer] = pd.read_csv(os.path.join(workspace, 'data', 'Hyper_p/Weights', file)).to_numpy()
                #self.all_weights[layer] = np.delete(self.all_weights, [0], axis=1)
                print(self.all_weights[layer].shape)
                layer += 1
            layer = 0
            biaslist = os.listdir(os.path.join(workspace, 'data', 'Hyper_p/Biases'))
            for file in sorted(biaslist, key=lambda s: s.lower()):
                self.all_bias[layer] = pd.read_csv(os.path.join(workspace, 'data', 'Hyper_p/Biases', file)).to_numpy()
                layer += 1
            print(self.all_weights[0])
        else:
            for i in range(len(layers)):
                self.all_weights[i] = self.random_arrs(i)
                if i == 0:
                    self.all_bias[i] = np.random.uniform(low=-0.03125, high=0.03125, size=(self.layers[i],1))
                else:
                    bias_val = 1 / math.sqrt(self.layers[layer - 1])
                    self.all_bias[i] = np.random.uniform(low=-bias_val, high=bias_val, size=(self.layers[i],1))
            #for val in self.all_weights:
               # print(val.shape)

        # init learn speed
        self.speed = step

        # determine gradient decent type
        self.stochastic = stochastic

    def random_arrs(self, layer: int):
        if (layer == 0):
            return(np.random.uniform(low=-0.03125, high=0.03125, size=(self.layers[layer],1024)))
        else:
            #return(np.random.uniform(low=-0.03125, high=0.03125, size=(self.layers[layer], self.layers[layer-1])))
            weight_val = 1 / math.sqrt(self.layers[layer - 1])
            return(np.random.uniform(low=-weight_val, high=weight_val, size=(self.layers[layer], self.layers[layer-1])))

    def img_to_np(self, dir) -> np.array:
        return np.array(Image.open(dir)).flatten()


    def print_weights(self) -> None:
        for arr in self.all_weights:
            print(arr.shape)

    def prop_forward(self, inputs: list, layer: int) -> list:
        a_vals = [0.0] * int(self.layers[layer])
        a_vals = np.dot(self.all_weights[layer], pd.DataFrame(inputs)) + self.all_bias[layer]
        if (layer == len(self.layers) - 1):
            return (softmax(a_vals))
        return(self.act.compute(a_vals))
    


    def prop_backword(self, x: list, label: int, layer: int):
        outputs = self.prop_forward(x, layer)
        if (layer == len(self.layers) - 1):
            y = self.vals_for_softmax(label)
            for i in range(len(y) - 1):
                val = (y[i] - outputs[i]) * softmax_prime(i, outputs) 
                for j in range(len(x) - 1):
                    self.all_weights[layer][i][j] += val* x[j]
                    self.all_bias[layer][i][j] += val
                return(val)

        for output in range(len(outputs)):
            val = float(self.prop_backword(outputs, label, layer + 1)) * float(sum(self.all_weights[layer][output]))
            for input in range(len(x) - 1):
                val += self.act.prime(x[input]) #edit
                self.all_weights[layer][output][input] += val * x[input]
                self.all_bias[layer][output][input] += val
            return val


    # potentially reconsider for time complexity
    def vals_for_softmax(self, index: int) -> list:
        vals = self.train_df[0].unique()
        for i in range(len(vals)):
            if vals[i] != index:
                vals[i] = 0
            else:
                vals[i] = 1
        return vals


    def train(self, epochs: int) -> None:
        # call back prop with dataset epochs times
        for epoch in  range(epochs):
            for img_dir, row in self.train_df.iterrows():
                self.prop_backword(self.img_to_np(img_dir).tolist(), row[0], 0)
            print("epoch: ", epoch, self.validate(train_df))

    def pred(self, img: np.array) -> int:
        for i in range(len(self.layers)):
            img = self.prop_forward(img, i)
        return img

    def validate(self, test_df: pd.DataFrame) -> None:
        count, countright = 0, 0
        for img_dir, row in test_df.iterrows():
            outputs = self.pred(self.img_to_np(img_dir))
            actual = list(self.vals_for_softmax(row[0]))
            if (outputs.index(max(outputs))) == (actual.index(max(actual))):
                countright += 1
            count += 1
        print("count: ", count, "countright: ", countright)

    def save(self) -> None:
        hp_data = '/Users/advaysingh/Documents/projects/hindi_classification/data/hyper_p'
        hp_data_biases, hp_data_weights = hp_data + '/Biases', hp_data + '/Weights'
        os.makedirs(hp_data)
        os.makedirs(hp_data_biases)
        os.makedirs(hp_data_weights)
        for layer in range(len(self.layers)):
            df_weights = pd.DataFrame(self.all_weights[layer])
            df_bias = pd.DataFrame(self.all_bias[layer])
            weights_file_name = "layer_" + str(layer) + "weights.csv"
            bias_file_name = "layer_" + str(layer) + "bias.csv"
            weights_file = open(os.path.join(hp_data_weights, weights_file_name), "x")
            bias_file = open(os.path.join(hp_data_biases, bias_file_name), "x")
            df_weights.to_csv(weights_file, index=False)
            df_bias.to_csv(bias_file, index=False)


    """def print_weights(self, layer: int) -> None:
        for row in self.all_weights[layer]:
            for weight in row:
                print(weight)"""



In [260]:
cnn = Model(train_df, Activation('sigmoid'), [5, 46], 1.2, False, False)
cnn.print_weights()
cnn.prop_forward([3] * 1024, 0)
cnn.save()
#cnn.print_weights(1)
#cnn.validate(test_df)
#cnn.train(1) # something for committing

(5, 1024)
(46, 5)


  n.append(1 / (1 + math.exp(val)))


In [None]:

dict = open(os.path.join(data, 'dict.csv'), "x")



In [256]:
import json
import time
import random

from PIL import Image

cnn = Model(train_df, Activation('sigmoid'), [5, 46], 1.2, False, True)
"""outs = cnn.pred(cnn.img_to_np(img_path))
index = outs.index(max(outs))
new_english_value = pd.read_csv(os.path.join(workspace, 'data', 'dict.csv'))[index][1]"""

def update_json_file(hindi_value, english_value):
    with open(out_file, 'r') as file:
        data = json.load(file)
    data['hindi'] = hindi_value
    data['english'] = english_value
    with open(out_file, 'w') as file:
        json.dump(data, file, indent=4)

# Define a list of possible values if applicable
hindi_values = ["हिं", "नया", "मूल्य", "उदाहरण"]
english_values = ["Eng", "New", "Value", "Example"]

# Loop to update the JSON file every second with a random value
try:
    while True:
        # Generate a random value from the lists
        new_hindi_value = random.choice(hindi_values)
        #new_english_value = random.choice(english_values)
        img = np.array(Image.open(img_path).convert("L").resize((32, 32))).flatten() * 0.05
        index = cnn.pred(img).index(max(cnn.pred(img)))
        new_english_value = str(pd.read_csv(os.path.join(workspace, 'data', 'dict.csv')).iloc[index, 1])

        update_json_file(new_hindi_value, new_english_value)
        
        # Print the new values (for debug purposes)
        # print(f'Updated JSON file with hindi: {new_hindi_value}, english: {new_english_value}')

        # read in and resize img
 
        # read in image for debugging
        """img.show("img to classify")
        print(img.format)
        print(img.mode)
        print(img.size)"""

        # create np array and resize
        """img_data = np.array(img).flatten()
        print(img_data.shape)
        print(img_data)"""

        
        time.sleep(10)  # Wait for 1 second
except KeyboardInterrupt:
    print("Update stopped by user.")

(5, 1024)
(46, 5)
[[0.13867084 0.00595203 0.84538799 ... 0.16139284 0.07455903 0.68609528]
 [0.47035962 0.93018672 0.20685471 ... 0.39655035 0.54848046 0.29436777]
 [0.84542472 0.81282378 0.35524723 ... 0.92808459 0.27871833 0.35993218]
 [0.55314791 0.50851145 0.15798064 ... 0.978258   0.65857135 0.43516438]
 [0.51129215 0.66847693 0.24802133 ... 0.86948478 0.90842828 0.5043761 ]]
4.806359059720454
1.9927204874541318
3.056150994231626
1.969403031284487
7.991853783471584
7.702991264695115
6.147757756773159
4.330069420450055
4.3223139801812485
4.653181189186932
4.372100910134904
5.264311200049357
4.27946577096913
5.339943948393503
6.2519441986701105
7.56205486872587
3.867131239858871
2.1495707086092444
3.1738733973930064
2.5674161467457686
5.530193553419669
3.490622193144374
2.1550618511655215
4.499859604811533
4.915958746109226
6.104691156045543
3.1692962654628953
1.6790756204096728
5.613115928448037
2.7187267833250957
2.3632618594338304
3.796636642516107
3.600634975171241
5.11443013094

  n.append(1 / (1 + math.exp(val)))
  print(math.exp(cost))
  exp_vals.append(math.exp(cost))
  n.append(1 / (1 + math.exp(val)))
  print(math.exp(cost))
  exp_vals.append(math.exp(cost))


2.573044265734034
1.0486027792648842
1.5838599989366875
1.1592791812437373
2.5144446304022603
2.461425435279481
2.144832370509424
1.4736056385528256
1.7699573803056114
1.5432091526085616
1.4411713337163943
2.287997519063537
1.5353074206030755
2.578163399310738
2.451090200452225
2.5806659049549325
1.4222633356639025
1.0767905917119702
1.0378201870286503
1.3254902206603383
2.0902876231909318
1.745049427442965
1.0139222492409876
1.5349860481678845
2.5102254147131746
1.746623011208863
1.1402802624230917
1.0979892562600582
1.6164356262411954
1.1736866799786805
1.0290960744957862
2.260102321045
1.5667489501154157
2.0812589943606596
1.8814331804445916
1.2900028651996003
1.0809333659663083
1.7039074212384637
1.5715118402630022
1.6161528373804297
1.9718958149503667
2.4649840431715853
1.0304314325398418
1.8163776607373634
1.1841030248440265
2.2293779080996305
2.573044265734034
1.0486027792648842
1.5838599989366875
1.1592791812437373
2.5144446304022603
2.461425435279481
2.144832370509424
1.473605

  n.append(1 / (1 + math.exp(val)))
  print(math.exp(cost))
  exp_vals.append(math.exp(cost))


2.573044265731372
1.0486027792641646
1.583859998935726
1.1592791812433159
2.514444630399244
2.4614254352764307
2.1448323705075043
1.4736056385516156
1.7699573803028141
1.5432091526065699
1.4411713337147345
2.2879975190615767
1.5353074206009272
2.578163399307718
2.4510902004518638
2.580665904953947
1.422263335662239
1.0767905917116372
1.0378201870272419
1.3254902206590784
2.090287623187611
1.7450494274413706
1.0139222492399111
1.5349860481661357
2.5102254147128322
1.746623011207198
1.1402802624212893
1.097989256259828
1.616435626238837
1.1736866799771062
1.0290960744943405
2.260102321044491
1.5667489501153264
2.081258994359074
1.8814331804419184
1.2900028651984405
1.0809333659650724
1.7039074212356247
1.5715118402610706
1.6161528373800595
1.9718958149482
2.464984043169432
1.0304314325385322
1.816377660734668
1.184103024843454
2.2293779080978737
2.573044265731372
1.0486027792641646
1.583859998935726
1.1592791812433159
2.514444630399244
2.4614254352764307
2.1448323705075043
1.473605638551

  n.append(1 / (1 + math.exp(val)))
  print(math.exp(cost))
  exp_vals.append(math.exp(cost))


2.573044265731372
1.0486027792641646
1.583859998935726
1.1592791812433159
2.514444630399244
2.4614254352764307
2.1448323705075043
1.4736056385516156
1.7699573803028141
1.5432091526065699
1.4411713337147345
2.2879975190615767
1.5353074206009272
2.578163399307718
2.4510902004518638
2.580665904953947
1.422263335662239
1.0767905917116372
1.0378201870272419
1.3254902206590784
2.090287623187611
1.7450494274413706
1.0139222492399111
1.5349860481661357
2.5102254147128322
1.746623011207198
1.1402802624212893
1.097989256259828
1.616435626238837
1.1736866799771062
1.0290960744943405
2.260102321044491
1.5667489501153264
2.081258994359074
1.8814331804419184
1.2900028651984405
1.0809333659650724
1.7039074212356247
1.5715118402610706
1.6161528373800595
1.9718958149482
2.464984043169432
1.0304314325385322
1.816377660734668
1.184103024843454
2.2293779080978737
2.573044265731372
1.0486027792641646
1.583859998935726
1.1592791812433159
2.514444630399244
2.4614254352764307
2.1448323705075043
1.473605638551

  n.append(1 / (1 + math.exp(val)))


OverflowError: math range error