In [None]:
import numpy as np

In [None]:
def softmax(x):
    e_x = np.exp(x - np.max(x))  # Subtract max for numerical stability
    return e_x / e_x.sum(axis=0)

In [None]:
class Doc2Vec:    
    def __init__(self, num_docs, vocab_size, vector_size, learning_rate=0.01):
        self.num_docs = num_docs
        self.vocab_size = vocab_size
        self.vector_size = vector_size
        self.learning_rate = learning_rate

        # Random initialization of weights
        self.word_vectors = np.random.rand(vocab_size, vector_size)
        self.doc_vectors = np.random.rand(num_docs, vector_size)  # Vector for each document
    
    def train(self, dataset, epochs=1000):
        for epoch in range(epochs):
            for doc_id, target, context in dataset:
                # Forward pass: Calculate combined vector and predict probabilities
                context_vector = np.mean(self.word_vectors[context], axis=0)
                combined_vector = (context_vector + self.doc_vectors[doc_id]) / 2
                
                scores = np.dot(self.word_vectors, combined_vector)
                probs = softmax(scores)
                
                # Compute cross-entropy loss
                loss = -np.log(probs[target])
                if epoch % 100 == 0 and doc_id == 0:  # Example printout for monitoring, from the first document
                    print(f"Epoch {epoch}, Document {doc_id}, Loss: {loss}")
                
                # Backward pass: Compute gradients
                dL_dy = probs
                dL_dy[target] -= 1  # dL/dy where y is the softmax output

                dL_dv = np.outer(dL_dy, combined_vector)  # Gradient w.r.t. word vectors

                # Update word vectors
                self.word_vectors -= self.learning_rate * dL_dv
                
                # Gradient w.r.t. combined vector
                dL_dcombined_vector = np.dot(self.word_vectors.T, dL_dy)

                # Update context words
                for idx in context:
                    self.word_vectors[idx] -= self.learning_rate * dL_dcombined_vector / len(context)

                # Update document vector for specific document
                self.doc_vectors[doc_id] -= self.learning_rate * dL_dcombined_vector

    def get_document_vector(self, doc_id):
        return self.doc_vectors[doc_id]

In [None]:
# Example usage
num_docs = 3
vocab_size = 100
vector_size = 50

model = Doc2Vec(num_docs, vocab_size, vector_size)

# Example data: list of tuples (doc_id, target_word_index, context_indices)
dataset = [
    (0, 10, [1, 2, 3]),
    (1, 20, [21, 22, 23]),
    (2, 30, [31, 32, 33])
]

model.train(dataset, epochs=1000)

for i in range(num_docs):
    print(f"Document {i} vector:", model.get_document_vector(i))