In [8]:
import numpy as np
import re
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer

In [9]:
class MLP:
    def __init__(self, input_size, hidden_size, output_size):
        self.weights_input_hidden = np.random.randn(input_size, hidden_size)
        self.bias_input_hidden = np.zeros((1, hidden_size))
        self.weights_hidden_output = np.random.randn(hidden_size, output_size)
        self.bias_hidden_output = np.zeros((1, output_size))
        self.cache_weights_input_hidden = np.zeros((input_size, hidden_size))
        self.cache_bias_input_hidden = np.zeros((1, hidden_size))
        self.cache_weights_hidden_output = np.zeros((hidden_size, output_size))
        self.cache_bias_hidden_output = np.zeros((1, output_size))
        self.epsilon = 1e-8

    def forward(self, X):
        self.hidden_input = np.dot(X, self.weights_input_hidden) + self.bias_input_hidden
        self.hidden_output = self.relu(self.hidden_input)
        output = np.dot(self.hidden_output, self.weights_hidden_output) + self.bias_hidden_output
        output = self.softmax(output)
        return output

    def relu(self, X):
        return np.maximum(0, X)

    def relu_derivative(self, X):
        return np.where(X > 0, 1, 0)

    def softmax(self, X):
        exp_values = np.exp(X - np.max(X, axis=1, keepdims=True))
        return exp_values / np.sum(exp_values, axis=1, keepdims=True)

    def backward(self, X, y, output, learning_rate):
        error = output - y
        d_weights_hidden_output = np.dot(self.hidden_output.T, error)
        d_bias_hidden_output = np.sum(error, axis=0, keepdims=True)
        d_hidden_input = np.dot(error, self.weights_hidden_output.T) * self.relu_derivative(self.hidden_input)
        d_weights_input_hidden = np.dot(X.T, d_hidden_input)
        d_bias_input_hidden = np.sum(d_hidden_input, axis=0, keepdims=True)

        # AdaGrad update
        self.cache_weights_hidden_output += d_weights_hidden_output ** 2
        self.cache_bias_hidden_output += d_bias_hidden_output ** 2
        self.cache_weights_input_hidden += d_weights_input_hidden ** 2
        self.cache_bias_input_hidden += d_bias_input_hidden ** 2

        self.weights_hidden_output -= learning_rate * d_weights_hidden_output / (np.sqrt(self.cache_weights_hidden_output) + self.epsilon)
        self.bias_hidden_output -= learning_rate * d_bias_hidden_output / (np.sqrt(self.cache_bias_hidden_output) + self.epsilon)
        self.weights_input_hidden -= learning_rate * d_weights_input_hidden / (np.sqrt(self.cache_weights_input_hidden) + self.epsilon)
        self.bias_input_hidden -= learning_rate * d_bias_input_hidden / (np.sqrt(self.cache_bias_input_hidden) + self.epsilon)

    def train(self, X, y, num_epochs, learning_rate, batch_size):
        num_samples = X.shape[0]
        for epoch in range(num_epochs):
        # Shuffle the data
            indices = np.arange(num_samples)
            np.random.shuffle(indices)
            X_shuffled = X[indices]
            y_shuffled = y[indices]

        # Minibatch gradient descent
            for i in range(0, num_samples, batch_size):
                X_batch = X_shuffled[i:i+batch_size]
                y_batch = y_shuffled[i:i+batch_size]

            # Forward pass
                output = self.forward(X_batch)

            # Backward pass
                if y_batch.ndim == 1:
                    y_one_hot = np.eye(self.weights_hidden_output.shape[1])[y_batch]
                else:
                    y_one_hot = y_batch
                self.backward(X_batch, y_one_hot, output, learning_rate)



In [10]:
def load_and_preprocess_data(file_path):
    data = []
    with open(file_path, 'r') as file:
        for line in file:
            parts = line.strip().split('\t')
            if len(parts) == 2:
                text = preprocess_text(parts[1])
                label = parts[0]
                data.append((text, label))
    return data

def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'[^\w\s]', '', text)
    return text

In [11]:
# Load and preprocess the data
file_path = "/Users/naveenverma/Desktop/NewStart/Dataset/a1-data/books.txt"  # Provide the path to your data file
data = load_and_preprocess_data(file_path)
X = np.array([entry[0] for entry in data])
y = np.array([entry[1] for entry in data])

# Vectorize textual data
vectorizer = TfidfVectorizer()
X_vectorized = vectorizer.fit_transform(X)

# Convert categorical labels to numeric values
label_mapping = {'Jane Austen': 0, 'Arthur Conan Doyle': 1, 'Fyodor Dostoyevsky': 2}
y_numeric = np.array([label_mapping[label] for label in y])

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_vectorized.toarray(), y_numeric, test_size=0.2, random_state=42)

# Initialize and train the MLP model
input_size = X_train.shape[1]
hidden_size = 32
output_size = 3
num_epochs = 100
learning_rate = 0.001
batch_size = 8

mlp = MLP(input_size, hidden_size, output_size)
mlp.train(X_train, y_train, num_epochs, learning_rate, batch_size)

# Forward pass on test data
output_test = mlp.forward(X_test)
predictions_test = np.argmax(output_test, axis=1)

# Calculate accuracy
accuracy = np.mean(predictions_test == y_test)
print("Accuracy:", accuracy)

Accuracy: 0.5744626407369499


In [12]:
import tensorflow as tf
def create_model(input_size, hidden_size, output_size):
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(hidden_size, activation='relu', input_shape=(input_size,)),
        tf.keras.layers.Dense(output_size)
    ])
    return model

In [14]:
input_size = X_train.shape[1]
hidden_size = 32
output_size = 3

model = create_model(input_size, hidden_size, output_size)
model.compile(optimizer=tf.keras.optimizers.Adagrad(learning_rate=0.001),
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)

# Evaluate the model
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print("Accuracy:", test_accuracy)

Epoch 1/10
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.5556 - loss: 1.0820 - val_accuracy: 0.5557 - val_loss: 1.0543
Epoch 2/10
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.5672 - loss: 1.0456 - val_accuracy: 0.5557 - val_loss: 1.0311
Epoch 3/10
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.5728 - loss: 1.0209 - val_accuracy: 0.5557 - val_loss: 1.0129
Epoch 4/10
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.5686 - loss: 1.0043 - val_accuracy: 0.5557 - val_loss: 0.9975
Epoch 5/10
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.5711 - loss: 0.9878 - val_accuracy: 0.5557 - val_loss: 0.9844
Epoch 6/10
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.5669 - loss: 0.9757 - val_accuracy: 0.5557 - val_loss: 0.9733
Epoch 7/10
[1m391/391[0m 

In [15]:
model = create_model(input_size, hidden_size, output_size)
model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

# Train the model
num_epochs = 10
history = model.fit(X_train, y_train, epochs=num_epochs, batch_size=32, validation_split=0.2)

# Evaluate the model
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print("Accuracy:", test_accuracy)


Epoch 1/10
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.6644 - loss: 0.8107 - val_accuracy: 0.8996 - val_loss: 0.3492
Epoch 2/10
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.9310 - loss: 0.2564 - val_accuracy: 0.9207 - val_loss: 0.2324
Epoch 3/10
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.9626 - loss: 0.1382 - val_accuracy: 0.9229 - val_loss: 0.2083
Epoch 4/10
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9682 - loss: 0.1016 - val_accuracy: 0.9207 - val_loss: 0.2041
Epoch 5/10
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9791 - loss: 0.0713 - val_accuracy: 0.9184 - val_loss: 0.2069
Epoch 6/10
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9807 - loss: 0.0593 - val_accuracy: 0.9178 - val_loss: 0.2115
Epoch 7/10
[1m391/391[0m 

Conclusion-
1 - My custom model for MLP trained on our training data gave me 57% accuracy on the test set using AdaGrad optimizer, I also used tensorflow to train the model and it also gave 57% accuracy.
2 - I used the adam optimizer also to cross-check the accuracy and I am getting 91% accuracy which is higher than our AdaGrad optimizer on the same data.
3 - In this case, we can say that adam optimizer is a better optimization technique for this task than the AdaGrad optimizer.