In [1]:
import numpy as np 
import string 
from nltk.corpus import stopwords  

In [2]:
def softmax(x): 
    """Compute softmax values for each sets of scores in x."""
    e_x = np.exp(x - np.max(x)) 
    return e_x / e_x.sum() 

In [29]:
class word2vec(object): 
    def __init__(self): 
        self.N = 10
        self.X_train = [] #shape(10,6)
        self.y_train = [] #shape(10,6)
        self.window_size = 2
        self.alpha = 0.001 # learning rate
        self.words = [] 
        self.word_index = {} 
   
    def initialize(self,V,data): 
        self.V = V #number of words(nodes): 6
        self.W = np.random.uniform(-0.8, 0.8, (self.V, self.N)) # weight matrix
        self.W1 = np.random.uniform(-0.8, 0.8, (self.N, self.V)) # weight matrix_1
           
        self.words = data # words in sentences(nodes in graph)
        for i in range(len(data)): # store index by word in word_index dictionary
            self.word_index[data[i]] = i 
   
       
    def feed_forward(self,X): 
        
        self.h = np.dot(self.W.T,X).reshape(self.N,1) #compute hidden layer
        self.u = np.dot(self.W1.T,self.h) #compute output layer
        #print(self.u) 
        self.y = softmax(self.u) #finally apply softmax to the output (34,2) 
        return self.y 
           
    def backpropagate(self,x,t): #train by backpropagating
        e = self.y - np.asarray(t).reshape(self.V,1) #(34,2) - (34,2)
        # e.shape is V x 1 
        dLdW1 = np.dot(self.h,e.T) 
        X = np.array(x).reshape(self.V,1) 
        dLdW = np.dot(X, np.dot(self.W1,e).T) 
        self.W1 = self.W1 - self.alpha*dLdW1 
        self.W = self.W - self.alpha*dLdW 
           
    def train(self,epochs): #training process with given epochs
        for x in range(1,epochs):         
            self.loss = 0 #initialize loss
            for j in range(len(self.X_train)): 
                self.feed_forward(self.X_train[j]) 
                #print("X_train[j]:{}".format((np.array(self.X_train[j])).shape))
                self.backpropagate(self.X_train[j],self.y_train[j]) 
                C = 0
                for m in range(self.V): 
                    if(self.y_train[j][m]): 
                        self.loss += -1*self.u[m][0] 
                        C += 1
                self.loss += C*np.log(np.sum(np.exp(self.u))) #update loss
            print("epoch ",x, " loss = ",self.loss) 
            self.alpha *= 1/( (1+self.alpha*x) ) 
              
    def predict(self,word,number_of_predictions): # predict context words that appear within window length
        if word in self.words: 
            index = self.word_index[word] 
            X = [0 for i in range(self.V)] 
            X[index] = 1
            prediction = self.feed_forward(X) 
            output = {} 
            for i in range(self.V): 
                output[prediction[i][0]] = i 
               
            top_context_words = [] 
            for k in sorted(output,reverse=True): 
                top_context_words.append(self.words[output[k]]) 
                if(len(top_context_words)>=number_of_predictions): 
                    break
       
            return top_context_words 
        else: 
            print("Word not found in dicitonary")   

In [30]:
#Data cleaning and preprocessing
def preprocessing(corpus): #input : "The earth revolves around the sun. The moon revolves around the earth"
    stop_words = set(stopwords.words('english'))#eliminate stop words   
    training_data = [] 
    sentences = corpus.split(".")#split whole input sentence based on "."
    for i in range(len(sentences)): 
        sentences[i] = sentences[i].strip()#remove unwanted spaces("", "\n") 
        sentence = sentences[i].split()#split each sentence to words 
        x = [word.strip(string.punctuation) for word in sentence 
                                     if word not in stop_words]#remove any punctuation in word x
        x = [word.lower() for word in x]#convert all characters in word x to lowercase 
        training_data.append(x)#append refined sentence list x to traning_data list 
    return training_data#return refined traing_data (each element is refined word list) 
    #output: [['the', 'earth', 'revolves', 'around', 'sun'], ['the', 'moon', 'revolves', 'around', 'earth']]


def prepare_data_for_training(sentences,w2v):#receive (prepocessed data, word2vec object) 
    data = {}#data dictionary(key = each word(vertex), value = number of the word in data) 
    for sentence in sentences:# count the number of each word(vertex) in input data and save it to data dictionary 
        for word in sentence: 
            if word not in data: 
                data[word] = 1
            else: 
                data[word] += 1
    V = len(data)#number of types of words(vertices) 
    data = sorted(list(data.keys()))#sort words in the data list

    vocab = {}#vocabulary dictionary(key = word, value = the word's index in data list)
    for i in range(len(data)): 
        vocab[data[i]] = i 
    # vocab : {'around': 0, 'earth': 1, 'moon': 2, 'revolves': 3, 'sun': 4, 'the': 5}
    
    #for i in range(len(words)): 
    for sentence in sentences: #convert each word to one-hot vector
        for i in range(len(sentence)): 
            center_word = [0 for x in range(V)]#[0,0,0,0,0,0] e.g., list of 6 number of zeros
            center_word[vocab[sentence[i]]] = 1

            context = [0 for x in range(V)]#[0,0,0,0,0,0] e.g., list of 6 number of zeros 

            #slide window over random walk(sentence)
            for j in range(i-w2v.window_size,i+w2v.window_size): #window of length 2w+1
                if i!=j and j>=0 and j<len(sentence): 
                    context[vocab[sentence[j]]] += 1 # words that appear within window length
                    
            #assign data for training      
            w2v.X_train.append(center_word) 
            w2v.y_train.append(context) 
    
    print("X_train:{}".format(w2v.X_train))
    print(np.array(w2v.X_train).shape)
    print("\ny_train{}".format(w2v.y_train))
    print(np.array(w2v.y_train).shape)
    #assign number of words in sentences:6
    #list of words in sentence:['around', 'earth', 'moon', 'revolves', 'sun', 'the']
    w2v.initialize(V,data)

    return w2v.X_train,w2v.y_train 

In [31]:
#call functions
corpus = "" 
corpus += "The earth revolves around the sun. The moon revolves around the earth"
epochs = 1000
  
training_data = preprocessing(corpus)#refiend data 
w2v = word2vec()#word2vec object 
  
prepare_data_for_training(training_data,w2v) 
w2v.train(epochs)  
  
print(w2v.predict("around",3))  

X_train:[[0, 0, 0, 0, 0, 1], [0, 1, 0, 0, 0, 0], [0, 0, 0, 1, 0, 0], [1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 1, 0], [0, 0, 0, 0, 0, 1], [0, 0, 1, 0, 0, 0], [0, 0, 0, 1, 0, 0], [1, 0, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0]]
(10, 6)

y_train[[0, 1, 0, 0, 0, 0], [0, 0, 0, 1, 0, 1], [1, 1, 0, 0, 0, 1], [0, 1, 0, 1, 1, 0], [1, 0, 0, 1, 0, 0], [0, 0, 1, 0, 0, 0], [0, 0, 0, 1, 0, 1], [1, 0, 1, 0, 0, 1], [0, 1, 1, 1, 0, 0], [1, 0, 0, 1, 0, 0]]
(10, 6)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  1  loss =  43.49286432214376
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  2  loss =  43.43419218639898
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  3  loss

X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  60  loss =  41.29988179564218
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  61  loss =  41.28249182413809
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  62  loss =  41.26549713550039
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  63  loss =  41.24888643657304
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  64  loss =  41.232648762977675
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,

X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  112  loss =  40.74168912094535
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  113  loss =  40.73534122529142
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  114  loss =  40.72909393693164
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  115  loss =  40.722944995585294
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  116  loss =  40.71689220465926
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j

X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  168  loss =  40.49581204144434
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  169  loss =  40.49283108495653
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  170  loss =  40.489883881331856
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  171  loss =  40.48696987297557
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  172  loss =  40.48408851423201
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j

X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  227  loss =  40.36377551622576
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  228  loss =  40.3621146293896
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  229  loss =  40.360468018024406
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  230  loss =  40.35883550110037
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  231  loss =  40.357216900598154
X_train[j]:(6,)
X_train[j

X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  288  loss =  40.28334282913214
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  289  loss =  40.28230469605105
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  290  loss =  40.28127367595258
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  291  loss =  40.28024969653928
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  292  loss =  40.279232686482615
X_train[j]:(6,)
X_train[j

X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  347  loss =  40.23228574110302
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  348  loss =  40.2315690776292
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  349  loss =  40.230856512222346
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  350  loss =  40.23014801001781
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  351  loss =  40.229443536543236
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j

X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  407  loss =  40.195511804516535
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  408  loss =  40.19499049137769
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  409  loss =  40.194471727869264
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  410  loss =  40.19395549540466
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  411  loss =  40.193441775576765
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train

X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  468  loss =  40.16779097975727
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  469  loss =  40.167396676270094
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  470  loss =  40.167004053140666
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  471  loss =  40.16661309968181
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  472  loss =  40.16622380529661
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[

X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  525  loss =  40.14771687201504
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  526  loss =  40.147403600955734
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  527  loss =  40.147091521366235
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  528  loss =  40.14678062647804
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  529  loss =  40.14647090957368
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[

X_train[j]:(6,)
X_train[j]:(6,)
epoch  588  loss =  40.13006632223828
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  589  loss =  40.1298166702196
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  590  loss =  40.12956786669691
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  591  loss =  40.129319907359935
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  592  loss =  40.129072787927505
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j

X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  645  loss =  40.11707478758737
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  646  loss =  40.11686738458319
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  647  loss =  40.1166606245259
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  648  loss =  40.11645450443511
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  649  loss =  40.11624902134874
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:

X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  719  loss =  40.103289438839475
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  720  loss =  40.10312261058033
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  721  loss =  40.10295624647362
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  722  loss =  40.10279034458739
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  723  loss =  40.10262490300043
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j

X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  772  loss =  40.09504481829171
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  773  loss =  40.09490015842254
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  774  loss =  40.09475587348306
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  775  loss =  40.094611962019144
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  776  loss =  40.09446842258416
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j

epoch  835  loss =  40.086609891782516
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  836  loss =  40.08648628411039
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  837  loss =  40.08636297269181
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  838  loss =  40.08623995646399
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  839  loss =  40.0861172343692
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  840

X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  900  loss =  40.07914837947295
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  901  loss =  40.07904202045307
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  902  loss =  40.0789358979699
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  903  loss =  40.078830011235915
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  904  loss =  40.078724359467024
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j

X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  954  loss =  40.07372498760902
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  955  loss =  40.07363035541367
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  956  loss =  40.07353592177956
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  957  loss =  40.07344168608286
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
epoch  958  loss =  40.07334764770237
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]:(6,)
X_train[j]