<a href="https://colab.research.google.com/github/Anilesh05/Anilesh/blob/main/Matrix_Multiplication_Using_Mapreduce.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# ***Download and install hadoop***

In [None]:
!apt-get install openjdk-8-jdk
!wget https://dlcdn.apache.org/hadoop/common/hadoop-3.3.6/hadoop-3.3.6.tar.gz
!tar fx hadoop-3.3.6.tar.gz
import os
os.environ["JAVA_HOME"] = "/usr/lib/jvm/java-8-openjdk-amd64"
os.environ["HADOOP_HOME"] = "/content/hadoop-3.3.6"
!ln -s /content/hadoop-3.3.6/bin/* /usr/bin

# ***Create matrix.txt file***

In [None]:
%%writefile matrix.txt

A,0,0,0.0
A,0,1,1.0
A,0,2,2.0
A,0,3,3.0
A,0,4,4.0
A,1,0,5.0
A,1,1,6.0
A,1,2,7.0
A,1,3,8.0
A,1,4,9.0
B,0,0,0.0
B,0,1,1.0
B,0,2,2.0
B,1,0,3.0
B,1,1,4.0
B,1,2,5.0
B,2,0,6.0
B,2,1,7.0
B,2,2,8.0
B,3,0,9.0
B,3,1,10.0
B,3,2,11.0
B,4,0,12.0
B,4,1,13.0
B,4,2,14.0

In [None]:
%%writefile mapper.py
#!/usr/bin/env python
import sys

# Number of rows in A
m = 2
# Number of columns in B
p = 3

# Read input from standard input
for line in sys.stdin:
    # Remove leading and trailing whitespace
    line = line.strip()
    # Split the line into entry data
    entry = line.split(",")
    # Check if the entry has the expected number of elements
    if len(entry) == 4:
        # Set row, column, and value for this entry
        row = int(entry[1])
        col = int(entry[2])
        value = float(entry[3])
        # Check if this is an entry in matrix A
        if entry[0] == "A":
            # Generate the necessary key-value pairs
            for k in range(p):
                print('{0:d},{1:d}\tA,{2:d},{3:f}'.format(row, k, col, value))
        # Check if this is an entry in matrix B
        elif entry[0] == "B":
            # Generate the necessary key-value pairs
            for k in range(m):
                print('{0:d},{1:d}\tB,{2:d},{3:f}'.format(k, col, row, value))

In [None]:
%%writefile reducer.py
#!/usr/bin/env python

import sys

# Number of columns of A/rows of B
n = 5

# Create data structures to hold the current row/column values
current_key = None
current_res = 0.0
value_dict = {}

# Input comes from STDIN (stream data that goes to the program)
for line in sys.stdin:
    # Remove leading and trailing whitespace
    line = line.strip()

    # Get key/value
    key, value = line.split('\t', 1)

    # Parse key/value input
    try:
        row, col = map(int, key.split(','))
        value = value.split(',')
        replicate_key, element_value = int(value[1]), float(value[2])
        key = (row, col)
    except:
        continue

    # If we are still on the same key...
    if key == current_key:
        # Process key/value pair
        if replicate_key not in value_dict:
            value_dict[replicate_key] = [element_value]
        else:
            value_dict[replicate_key].append(element_value)

    # Otherwise, if this is a new key...
    else:
        # If this is a new key and not the first key we've seen
        if current_key:
            # Compute/output result to STDOUT
            for j in range(n):
                if j in value_dict and len(value_dict[j]) == 2:
                    current_res += value_dict[j][0] * value_dict[j][1]
            print('({0:d},{1:d}),{2:f}'.format(row, col, current_res))

        current_key = key
        value_dict = {}

        # Process input for new key
        value_dict[replicate_key] = [element_value]
        current_res = 0.0

# Compute/output result for the last key
if current_key:
    for j in range(n):
        if j in value_dict and len(value_dict[j]) == 2:
            current_res += value_dict[j][0] * value_dict[j][1]
    print('({0:d},{1:d}),{2:f}'.format(row, col, current_res))


In [None]:
!hdfs dfs -mkdir input

In [None]:
!hdfs dfs -mv matrix.txt input/

In [None]:
!hdfs dfs -cat input/matrix.txt

In [None]:
!hadoop jar /content/hadoop-3.3.6/share/hadoop/tools/lib/hadoop-streaming-3.3.6.jar \
    -files mapper.py,reducer.py \
    -mapper mapper.py \
    -reducer reducer.py \
    -input input \
    -output output

In [None]:
!cat output/part-00000