# **HADOOP MAPREDUCE DISTRIBUTED ARTIFICIAL NEURAL NETWORK**

In [None]:
!pip install mrjob numpy

Collecting mrjob
  Downloading mrjob-0.7.4-py2.py3-none-any.whl.metadata (7.3 kB)
Downloading mrjob-0.7.4-py2.py3-none-any.whl (439 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/439.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m112.6/439.6 kB[0m [31m3.2 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m [32m430.1/439.6 kB[0m [31m7.2 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m439.6/439.6 kB[0m [31m6.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: mrjob
Successfully installed mrjob-0.7.4


## ANN THROUGH MAP-REDUCE (ON IRIS DATASET)

---



In [None]:
%%writefile neural_network.py
import time
from mrjob.job import MRJob
from mrjob.step import MRStep
import numpy as np
from numpy import array, random, dot

class NeuralNetwork(MRJob):

    def configure_args(self):
        super(NeuralNetwork, self).configure_args()
        self.add_passthru_arg('--learning_rate', default=0.1, type=float,
                              help='learning rate')
        self.add_passthru_arg('--num_iterations', default=100, type=int,
                              help='number of iterations')
        self.add_passthru_arg('--hidden_layers', default='5,5', type=str,
                              help='number of neurons in hidden layers')
        self.add_passthru_arg('--l2_regularization', default=0.1, type=float,
                              help='L2 regularization parameter')

    def initialize_weights(self):
        self.synaptic_weights = []
        layer_sizes = [self.num_features] + \
            [int(x) for x in self.hidden_layers.split(',')] + [self.num_classes]
        for i in range(len(layer_sizes)-1):
            w = 2 * random.random((layer_sizes[i], layer_sizes[i+1])) - 1
            self.synaptic_weights.append(w)

    def activation_function(self, x):
        return 1.0 / (1.0 + np.exp(-x))

    def activation_derivative(self, x):
        return x * (1 - x)

    def feedforward(self, x):
        activations = [x]
        for i in range(len(self.synaptic_weights)):
            dot_product = dot(activations[i], self.synaptic_weights[i])
            activation = self.activation_function(dot_product)
            activations.append(activation)
        return activations

    def backpropagation(self, x, y):
        activations = self.feedforward(x)
        y_onehot = np.zeros(self.num_classes)
        y_onehot[y] = 1

        error = [y_onehot - activations[-1]]
        deltas = [error[-1] * self.activation_derivative(activations[-1])]

        for i in range(len(self.synaptic_weights)-1, 0, -1):
            error.append(dot(deltas[-1], self.synaptic_weights[i].T))
            deltas.append(error[-1] * self.activation_derivative(activations[i]))
        deltas.reverse()

        for i in range(len(self.synaptic_weights)):
            self.synaptic_weights[i] += self.learning_rate * \
                (dot(activations[i].reshape(-1, 1), deltas[i].reshape(1, -1)) +
                 self.l2_regularization * self.synaptic_weights[i])

    def mapper_init(self):
        self.learning_rate = 0.1
        self.num_iterations = 100
        self.hidden_layers = "4,3"
        self.l2_regularization = 0.01
        self.num_features = 4
        self.num_classes = 3
        self.label_map = {'Iris-setosa': 0, 'Iris-versicolor': 1, 'Iris-virginica': 2}
        self.initialize_weights()
        self.processed_samples = 0

    def mapper(self, _, line):
        # Skip header or empty lines
        if not line.strip() or 'Id' in line or 'Species' in line:
            return

        try:
            # Parse the input line
            data = line.strip().split(',')
            if len(data) < 5:
                return

            # Convert features to float and ensure they're regular Python floats
            features = array([float(x) for x in data[1:5]])
            label = data[-1].strip()

            if label not in self.label_map:
                return

            label_int = self.label_map[label]

            # Train the network
            for _ in range(self.num_iterations):
                self.backpropagation(features, label_int)

            # Get prediction and convert numpy types to Python native types
            output = self.feedforward(features)[-1]
            predicted_class = int(np.argmax(output))  # Convert to Python int
            output_list = [float(x) for x in output]  # Convert to Python float list

            # Yield with Python native types
            yield str(label_int), {
                'predicted_class': predicted_class,
                'probabilities': output_list
            }

            self.processed_samples += 1

        except Exception as e:
            yield "error", str(e)

    def reducer_init(self):
        self.results = {str(i): {
            'correct': 0,
            'total': 0,
            'predictions': []
        } for i in range(3)}

    def reducer(self, key, values):
        if key == "error":
            for value in values:
                yield "Error", value
            return

        actual_class = int(key)
        for value in values:
            predicted_class = value['predicted_class']
            probabilities = value['probabilities']

            self.results[key]['total'] += 1
            if predicted_class == actual_class:
                self.results[key]['correct'] += 1
            self.results[key]['predictions'].append(probabilities)

    def reducer_final(self):
        for class_label, data in self.results.items():
            if data['total'] > 0:
                accuracy = float(data['correct']) / float(data['total'])
                avg_probs = np.mean(data['predictions'], axis=0) if data['predictions'] else [0.0] * self.num_classes

                # Convert all numpy types to Python native types
                result = {
                    'samples_processed': int(data['total']),
                    'correct_predictions': int(data['correct']),
                    'accuracy': float(accuracy),
                    'average_probabilities': [float(x) for x in avg_probs]
                }

                yield f"Class {class_label}", result

    def steps(self):
        return [
            MRStep(mapper_init=self.mapper_init,
                  mapper=self.mapper,
                  reducer_init=self.reducer_init,
                  reducer=self.reducer,
                  reducer_final=self.reducer_final)
        ]

if __name__ == '__main__':
    NeuralNetwork.run()


"""
Output:
This output suggests that the code has successfully run a neural network classifier using the perceptron algorithm
to classify a dataset into three classes labeled as "0.0", "1.0", and "2.0".

The output shows the class labels in the first column and the weights assigned to the two features used to classify the data in the second column.
For example, the classifier assigned a weight of 0.4791666666666667 to the first feature and 0.5208333333333334 to the second feature to classify instances
belonging to class "2.0".

Similarly, for class "1.0", the classifier assigned a weight of 0.7887323943661971 to the first feature and 0.2112676056338028 to the second feature,
and for class "0.0", the classifier assigned a weight of 0.6101694915254238 to the first feature and 0.3898305084745763 to the second feature.
"""

Overwriting neural_network.py


In [None]:
# !chmod +x neural_network.py
!chmod +x neural_network.py

In [None]:
!python neural_network.py "/content/drive/MyDrive/ANN MAPREDUCE DATASET/Iris.csv" > output.txt


No configs found; falling back on auto-configuration
No configs specified for inline runner
Creating temp directory /tmp/neural_network.root.20241030.192213.418051
Running step 1 of 1...
job output is in /tmp/neural_network.root.20241030.192213.418051/output
Streaming final output from /tmp/neural_network.root.20241030.192213.418051/output...
Removing temp directory /tmp/neural_network.root.20241030.192213.418051...


In [None]:
!cat output.txt

"Class 0"	{"samples_processed": 50, "correct_predictions": 50, "accuracy": 1.0, "average_probabilities": [0.9727167698843242, 0.02622352395388161, 0.030448172976813476]}
"Class 1"	{"samples_processed": 50, "correct_predictions": 22, "accuracy": 0.44, "average_probabilities": [0.5752634958600781, 0.42563008454115914, 0.02351538979056609]}
"Class 2"	{"samples_processed": 50, "correct_predictions": 35, "accuracy": 0.7, "average_probabilities": [0.014422626471982274, 0.3218506785500645, 0.6815229511282436]}
