In [1]:
import time

In [2]:
english_sentences = """This paper reviews the artificial intelligent algorithms in engine management. This study
provides a clear image of the current state of affairs for the past 15 years and provides fresh insights
and improvements for future directions in the field of engine management. The scope of this paper
comprises three main aspects to be discussed, namely, engine performance, engine control, and engine
diagnosis. The first is associated with the need to control the basic characteristics that prove that the
engine is working properly, namely, emission control and fuel economy."""

In [3]:
def random_gen():
    """
    Function to get a random values for the weights and the biases
    
    Returns: Random Integer
    
    """
    current=time.time()
    last_digit=int(str(current-int(current))[-1])
    if last_digit%2==0:
        return last_digit
    else:
        return -1*last_digit

In [4]:
def remove_tags(corpus):
    """
    Args : List of words
    
    Cleans the words of any tags and special characters.
    
    """
    for number in range(len(corpus)):
        corpus[number] = corpus[number].replace("\n", " ")
        corpus[number] = corpus[number].replace(".", "")
        corpus[number] = corpus[number].lower()

In [5]:
def create_vocabulary(words_list):
    """
    Creates a vocabulary list that can be used for one hot encoding later.
    
    Args : List of sorted words
    
    Returns : Vocabulary List
    
    """
    vocab={}
    index=0
    for _ ,word in enumerate(words):
        if word in vocab.keys():
            continue
        else:
            vocab[word]=index
            index+=1
    return vocab

In [11]:
vocab = create_vocabulary(english_sentences)

In [12]:
def one_hot_encoding(word, vocabulary=vocab):
    """
    Returns the one hot encoding of the word with respect to the vocabulary
    
    Args : specific word to encode, vocabulary
    
    Returns : one hot encoding of the word
    
    """
    one_hot_list=[0 for x in range(len(vocabulary))]
    one_hot_list[vocabulary[word]]=1
    return one_hot_list

In [13]:
def create_feature_matrix(dimensions, vocabulary = vocab):
    """
    Create the feature matrix which is used to make the embedding matrix
    
    Args : Dimensions of the embeddings specified, Vocabulary used.
    
    Returns : Feature Matrix
    
    """
    feature_matrix = [[(random_gen() + ((y*y/100)*random_gen()/10 + random_gen()) + x*random_gen())/100 for y in range(len(vocabulary))] for x in range(dimensions)]
    return feature_matrix

In [16]:
def create_embedding_matrix(word, feature_matrix=E):
    """
    Create the embedding matrix for a word
    
    Args : Word, Feature Matrix 
    
    Returns : Embedding Matrix
    
    """
    embedding_matrix=[]
    one_hot_list=one_hot_encoding(word)
    for i in range(len(feature_matrix)):
        summ=0
        for j in range(len(feature_matrix[0])):
            summ+=one_hot_list[j] * feature_matrix[i][j]
        embedding_matrix.append(summ)
    return embedding_matrix

In [15]:
words= english_sentences.split()
remove_tags(words)
words.sort()
vocab = create_vocabulary(words)
E = create_feature_matrix(3)

In [66]:
#make context, target pairs (window of only 1, in the forward direction only)
dataset=[]
sentences =english_sentences.split()
remove_tags(sentences)
for index in range(len(sentences)-1):
    dataset.append((sentences[index], sentences[index+1]))
    #dataset.append((sentences[index], sentences[index+2]))

In [67]:
dataset

[('this', 'paper'),
 ('paper', 'reviews'),
 ('reviews', 'the'),
 ('the', 'artificial'),
 ('artificial', 'intelligent'),
 ('intelligent', 'algorithms'),
 ('algorithms', 'in'),
 ('in', 'engine'),
 ('engine', 'management'),
 ('management', 'this'),
 ('this', 'study'),
 ('study', 'provides'),
 ('provides', 'a'),
 ('a', 'clear'),
 ('clear', 'image'),
 ('image', 'of'),
 ('of', 'the'),
 ('the', 'current'),
 ('current', 'state'),
 ('state', 'of'),
 ('of', 'affairs'),
 ('affairs', 'for'),
 ('for', 'the'),
 ('the', 'past'),
 ('past', '15'),
 ('15', 'years'),
 ('years', 'and'),
 ('and', 'provides'),
 ('provides', 'fresh'),
 ('fresh', 'insights'),
 ('insights', 'and'),
 ('and', 'improvements'),
 ('improvements', 'for'),
 ('for', 'future'),
 ('future', 'directions'),
 ('directions', 'in'),
 ('in', 'the'),
 ('the', 'field'),
 ('field', 'of'),
 ('of', 'engine'),
 ('engine', 'management'),
 ('management', 'the'),
 ('the', 'scope'),
 ('scope', 'of'),
 ('of', 'this'),
 ('this', 'paper'),
 ('paper', 'com

In [35]:
#dataset[i][0]--->context
#dataset[i][1]---->target

In [91]:
#Helper Functions defined here:

#Function for natural log, later can be used for log base 10. Can also try the Taylor Definition, but this seemed easier. I will compare precision later.
def ln(x):
    n = 10000000.0
    return n * ((x ** (1/n)) - 1)

#Can convert to base 10
def log(x):
  return ln(x)/ln(10)

#Function to calculate derivative of a function
def derivative(value,function):
  step=0.000001
  return (function(value+step) - function(value-step))/(2*step)

#Function to calculate the sigmoid
e=2.71828
def sigmoid(value):
  return 1/(1+e**(-1*value))

#Function to calculate the derivative of Binary Cross Entropy Loss
def bce_loss_derivative(y_true,z):
  return (y_true-z)/(z)*(1-z)

#Function to calculate the derivative of the sigmoid function
def sigmoid_derivative(value):
  return sigmoid(value) * (1-sigmoid(value))

#Function to calculate the Binary Cross Entropy Loss
def bce_loss(value,y_true):
  return -1*((y_true * log(value)) + ((1-y_true) * log(1-value)))

#Function to calculate the softmax
def softmax(layer):
  sum=0
  for i in range(len(layer)):
    sum+= 2.71828**layer[i]
  for i in range(len(layer)):
    layer[i]=2.71828**layer[i]/sum

#Function to calculate categorical loss
def categorical_loss(layer,y_true):
  loss=log(layer[y_true])
  return loss

#Function to calculate derivative of categorical loss
def cl_derivative(value,y_true, vocab_size=57):
    #one_hot=[0,0,0]
    one_hot=[0 for x in range(vocab_size)]
    one_hot[y_true]=1
    return value-one_hot[0]

#Function to take deepcopy of an object
def deep_copy(obj):
    if isinstance(obj, list):
        return [deep_copy(item) for item in obj]
    elif isinstance(obj, dict):
        return {key: deep_copy(value) for key, value in obj.items()}
    else:
        return obj

In [131]:
INPUT_LAYER = len(vocab)
HIDDEN_LAYER = 3
OUTPUT_LAYER = len(vocab)

In [132]:
no_of_neurons=[INPUT_LAYER, HIDDEN_LAYER, OUTPUT_LAYER]
n=no_of_neurons

In [158]:
#define the weights here:
Weights=[]
for i in range(len(n)-1):
  current=time.time()
  m=int(str(current-int(current))[-1])
  Weights.append([[(m/10 + x*random_gen()/100 - x*m*random_gen()/100+ random_gen())/10 for x in range(n[i+1])] for y in range(n[i])])

In [159]:
#define the biases here:
Biases=[]
for i in range(len(n)-1):
  current=time.time()
  m=int(str(current-int(current))[-1])
  Biases.append([(m/10+x*random_gen()/100 - x*m*random_gen()/100+ random_gen())/10 for x in range(n[i+1])])

In [160]:
#define the gradients of weights here:
Weights_Grad=[]
for i in range(len(n)-1):
  Weights_Grad.append([[0 for x in range(n[i+1])] for y in range(n[i])])

In [161]:
#define the gradients of biases here:
Biases_Grad=[]
for i in range(len(n)-1):
  Biases_Grad.append([0 for x in range(n[i+1])])

In [162]:
#define the activations here
Activations=[]
for i in range(len(n)-1):
  Activations.append([0 for x in range(n[i+1])])

In [163]:
#define cached activations here
ad=[]
for i in range(len(n)-1):
  ad.append([0 for x in range(n[i+1])])

In [164]:
Weights[0]

[[0.22000000000000003, 0.21800000000000003, 0.21600000000000003],
 [0.22000000000000003, 0.21800000000000003, 0.21600000000000003],
 [0.22000000000000003, 0.21800000000000003, 0.21600000000000003],
 [0.22000000000000003, 0.21800000000000003, 0.21600000000000003],
 [0.22000000000000003, 0.21800000000000003, 0.21600000000000003],
 [0.22000000000000003, 0.21800000000000003, 0.21600000000000003],
 [0.22000000000000003, 0.21800000000000003, 0.21600000000000003],
 [0.22000000000000003, 0.21800000000000003, 0.21600000000000003],
 [0.22000000000000003, 0.21800000000000003, 0.21600000000000003],
 [0.22000000000000003, 0.21800000000000003, 0.21600000000000003],
 [0.22000000000000003, 0.21800000000000003, 0.21600000000000003],
 [0.22000000000000003, 0.21800000000000003, 0.21600000000000003],
 [0.22000000000000003, 0.21800000000000003, 0.21600000000000003],
 [0.22000000000000003, 0.21800000000000003, 0.21600000000000003],
 [0.22000000000000003, 0.21800000000000003, 0.21600000000000003],
 [0.220000

In [165]:
Biases

[[-0.63, -0.588, -0.546],
 [0.27,
  0.258,
  0.246,
  0.23399999999999999,
  0.22199999999999998,
  0.21000000000000002,
  0.198,
  0.186,
  0.174,
  0.16199999999999998,
  0.15,
  0.13799999999999998,
  0.126,
  0.11399999999999999,
  0.10200000000000001,
  0.09,
  0.07799999999999999,
  0.06600000000000002,
  0.054000000000000006,
  0.041999999999999996,
  0.030000000000000027,
  0.017999999999999995,
  0.005999999999999983,
  -0.0060000000000000496,
  -0.01799999999999997,
  -0.02999999999999998,
  -0.041999999999999996,
  -0.054000000000000006,
  -0.06600000000000002,
  -0.07799999999999999,
  -0.09000000000000004,
  -0.10200000000000001,
  -0.11400000000000006,
  -0.12600000000000003,
  -0.13799999999999998,
  -0.15000000000000005,
  -0.162,
  -0.174,
  -0.18600000000000003,
  -0.198,
  -0.20999999999999996,
  -0.22200000000000006,
  -0.23399999999999999,
  -0.2459999999999999,
  -0.258,
  -0.2699999999999999,
  -0.28200000000000003,
  -0.29400000000000004,
  -0.30599999999999994,

In [166]:
w=Weights
b=Biases
w_grad=Weights_Grad
b_grad=Biases_Grad
a=Activations
#ca=Cached_Activations
w_original=deep_copy(w)
#b_original=deep_copy(b)

In [167]:
def forward_pass(message,true_label,X):
  input_layer=X[message]
  for i in range(len(n)-2):
    for j in range(n[i+1]):
      for k in range(n[i]):
        a[i][j]+=w[i][k][j] * input_layer[k]
      a[i][j] += b[i][j]
      a[i][j] = sigmoid(a[i][j])
    input_layer = a[i]
  softmax_layer=a[-2]
  i=len(n)-2
  for j in range(n[i+1]):
    for k in range(n[i]):
      a[i][j]+=w[i][k][j] * input_layer[k]
    a[i][j]+=b[i][j]
  output_layer=a[-1]
  softmax(output_layer)
  t=deep_copy(a)
  loss=categorical_loss(output_layer,true_label)
  for xx in range(len(a)):
    for yy in range(len(a[xx])):
      a[xx][yy]=0
  return (loss,t)

def get_loss(X_train,y_train):
  net_loss=0
  ca=[]
  for message in range(len(X_train)):
    loss,t=forward_pass(message,y_train[message],X_train)
    net_loss+=loss
    ca.append(t)

  return (net_loss,ca)

In [168]:
def calculate_ad(layer):
  for i in range(len(w[layer])):
    z=0
    for message in range(len(X_train)):
      z=ad[layer][0]*sigmoid_derivative(cache[message][layer][0])*w[layer][i][0]
    z=z/len(X_train)
    ad[layer-1][i]=z

In [169]:
def backpropogation(cache):
  #Lets just update the last layer first
  for number_cache in range(len(cache[0][-1])):
    for number in range(len(w_grad[-1])):
      grad_w=0
      for message in range(len(X_train)):
        grad_w+=(cl_derivative(cache[message][-1][number_cache], y_train[message]) * cache[message][-2][number])
      grad_w=grad_w/len(X_train)
      w_grad[-1][number][number_cache]=grad_w
      w[-1][number][number_cache]-=(learning_rate*grad_w)
  for number_cache in range(len(cache[0][-1])):
    grad_b=0
    for message in range(len(X_train)):
      grad_b+=(cl_derivative(cache[message][-1][number_cache], y_train[message]))
    grad_b=grad_b/len(X_train)
    b_grad[-1][number_cache]=grad_b
    b[-1][number_cache]-=(learning_rate*grad_b)
  for f in range(len(w[-1])):
    z=0
    for message in range(len(X_train)):
      z+=(cl_derivative(cache[message][-1][0],y_train[message])) *w[-1][f][0]
    z=z/len(X_train)
    ad[-2][f]=z
  layer=-2
  while(layer>=-1*(len(n)-1)):
    for number_cache in range(len(cache[0][layer])):
      for number in range(len(w_grad[layer])):
        grad_w=0
        for message in range(len(X_train)):
          if layer==-1*(len(n)-1):
            grad_w+=ad[layer][number_cache] * sigmoid_derivative(cache[message][layer][number_cache])*X_train[message][number]
          else:
            grad_w+=ad[layer][number_cache] * sigmoid_derivative(cache[message][layer][number_cache])*cache[message][layer-1][number]
        grad_w=grad_w/len(X_train)
        w_grad[layer][number][number_cache]=grad_w
        w[layer][number][number_cache]-=(learning_rate*grad_w)
    if layer==-1*(len(n)-1):
      break
    else:
      calculate_ad(layer)
    layer=layer-1
  layer_b=-2
  while(layer_b>=-1*(len(n)-1)):
    for number_cache in range(len(cache[0][layer_b])):
      grad_b2=0
      for message in range(len(X_train)):
        grad_b2+=ad[layer][number_cache] * sigmoid_derivative(cache[message][layer][number_cache])
      grad_b2=grad_b2/len(X_train)
      b_grad[layer_b][number_cache]=grad_b2
      b[layer_b][number_cache]-=(learning_rate*grad_b2)
    layer_b=layer_b-1

In [170]:
learning_rate=0.01

In [171]:
#Make a markdown here explaining why y_train is the way I have made it

In [172]:
X_train=[]
X=[]
y_train=[]
for pair in dataset:
    X_train.append(one_hot_encoding(pair[0].lower()))
    y_train.append(one_hot_encoding(pair[1].lower()).index(1))

In [149]:
X_train=[X_train[0]]
y_train=[y_train[0]]

In [None]:
#But X_train is only the first element, and y_train is only the first element
for i in range(1000):
  l,cache=get_loss(X_train,y_train)
  print(str(i)+" : " + str(l))
  backpropogation(cache)

#### This is the weight matrix.

In [175]:
Weights[0]

[[0.21975602876603068, 0.21818486618719032, 0.21575914782873143],
 [0.21975602876603068, 0.21818486618719032, 0.21575914782873143],
 [0.21975602876603068, 0.21818486618719032, 0.21575914782873143],
 [0.21975602876603068, 0.21818486618719032, 0.21575914782873143],
 [0.2190240984098846, 0.21873945468937672, 0.21503657405168933],
 [0.21975602876603068, 0.21818486618719032, 0.21575914782873143],
 [0.21975602876603068, 0.21818475138098928, 0.21575914782873143],
 [0.21975602876603068, 0.21818475138098928, 0.21575914782873143],
 [0.21975602876603068, 0.21818475138098928, 0.21575914782873143],
 [0.21975602876603068, 0.21818475138098928, 0.21575914782873143],
 [0.21975602876603068, 0.21818475138098928, 0.21575914782873143],
 [0.21975602876603068, 0.21818475138098928, 0.21575914782873143],
 [0.21975602876603068, 0.21818475138098928, 0.21575914782873143],
 [0.21951205475603655, 0.21836950108762956, 0.21551829277996054],
 [0.21975602876603068, 0.21818475138098928, 0.21575914782873143],
 [0.2197560

##### Here are the embeddings of each word

In [198]:
word_set=set()
for word in sentences:
    if word not in word_set:
        word_set.add(word)
        print(f"The embedding of {word} is {Weights[0][vocab[word.lower()]]}")
    else:
        continue

The embedding of this is [0.4192754282406679, 0.41654851113684815, 0.4112849393608496]
The embedding of paper is [0.41951695530476374, 0.41636567596621143, 0.41152329611793076]
The embedding of reviews is [0.4197584792246777, 0.4161828389205397, 0.41176164966443585]
The embedding of the is [0.41782619985139835, 0.4176454827755354, 0.4098547314168717]
The embedding of artificial is [0.21975602876603068, 0.21818486618719032, 0.21575914782873143]
The embedding of intelligent is [0.21975602876603068, 0.21818475138098928, 0.21575914782873143]
The embedding of algorithms is [0.21975602876603068, 0.21818486618719032, 0.21575914782873143]
The embedding of in is [0.21951205475603655, 0.21836950108762956, 0.21551829277996054]
The embedding of engine is [0.2185361309653641, 0.21910848316666756, 0.21455484381873616]
The embedding of management is [-0.880503300240182, -0.8706181630978077, -0.862498070327337]
The embedding of study is [0.4197584792246777, 0.4161828389205397, 0.41176164966443585]
The