In [18]:
!pip install git+https://github.com/andreinechaev/nvcc4jupyter.git


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting git+https://github.com/andreinechaev/nvcc4jupyter.git
  Cloning https://github.com/andreinechaev/nvcc4jupyter.git to /tmp/pip-req-build-5jz6_say
  Running command git clone --filter=blob:none --quiet https://github.com/andreinechaev/nvcc4jupyter.git /tmp/pip-req-build-5jz6_say
  Resolved https://github.com/andreinechaev/nvcc4jupyter.git to commit aac710a35f52bb78ab34d2e52517237941399eff
  Preparing metadata (setup.py) ... [?25l[?25hdone


In [19]:
!apt-get -y install libomp-dev


Reading package lists... Done
Building dependency tree       
Reading state information... Done
libomp-dev is already the newest version (1:10.0-50~exp1).
0 upgraded, 0 newly installed, 0 to remove and 24 not upgraded.


In [20]:
%load_ext nvcc_plugin

The nvcc_plugin extension is already loaded. To reload it, use:
  %reload_ext nvcc_plugin


In [21]:
!pip install mpi4py

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [22]:
%ls

[0m[01;34msample_data[0m/  sample-text.txt  [01;34msrc[0m/


########################################################################

In [23]:
import time
from mpi4py import MPI

# Linear search function
def linear_search(data, target):
    for item in data:
        if item == target:
            return True
    return False

# Parallel search function
def parallel_search(data, target):
    comm = MPI.COMM_WORLD
    rank = comm.Get_rank()
    size = comm.Get_size()

    # Scatter data to all ranks
    chunk_size = len(data) // size
    remainder = len(data) % size
    sendbuf = [data[i:i+chunk_size] for i in range(0, len(data), chunk_size)]
    sendcounts = [len(sendbuf[i]) for i in range(size)]
    displs = [sum(sendcounts[:i]) for i in range(size)]
    recvbuf = comm.scatter(sendbuf, root=0)

    # Search for target in the received data
    found = False
    for item in recvbuf:
        if item == target:
            found = True
            break

    # Combine the results from all ranks
    global_found = comm.allreduce(found, op=MPI.LOR)

    # Return the final result
    return global_found

# Generate example data
data = ["apple", "banana", "orange", "grape", "pineapple", "watermelon", "kiwi"] * 100000

# Analysis: Linear search
start_time = time.time()
result = linear_search(data, "kiwi")
end_time = time.time()
if result:
    print("Linear search: Target found!")
else:
    print("Linear search: Target not found.")
print("Linear search time: ", end_time - start_time, "\n")

# Analysis: Parallel search
start_time = time.time()
result = parallel_search(data, "kiwi")
end_time = time.time()
if result:
    print("Parallel search: Target found!")
else:
    print("Parallel search: Target not found.")
print("Parallel search time: ", end_time - start_time)


Linear search: Target found!
Linear search time:  5.650520324707031e-05 

Parallel search: Target found!
Parallel search time:  0.03224492073059082


In [24]:
with open('sample-text.txt', 'r') as file:
    lines = file.readlines()

words = []
for line in lines:
    line = line.strip()  # remove leading/trailing whitespaces
    if line:  # skip empty lines
        words_in_line = line.split()
        words.extend(words_in_line)

result = "[" + ", ".join(['"{}"'.format(word) for word in words]) + "]"

print(result)


IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Combining the above search techniques with File read operations

In [68]:
import time
from mpi4py import MPI

# Linear search function
def linear_search(data, target):
    for item in data:
        if item == target:
            return True
    return False

# # Parallel search function
# def parallel_search(data, target):
#     comm = MPI.COMM_WORLD
#     rank = comm.Get_rank()
#     size = comm.Get_size()

#     # Scatter data to all ranks
#     chunk_size = len(data) // size
#     remainder = len(data) % size
#     sendbuf = [data[i:i+chunk_size] for i in range(0, len(data), chunk_size)]
#     sendcounts = [len(sendbuf[i]) for i in range(size)]
#     displs = [sum(sendcounts[:i]) for i in range(size)]
#     recvbuf = comm.scatter(sendbuf, root=0)

#     # Search for target in the received data
#     found = False
#     for item in recvbuf:
#         if item == target:
#             found = True
#             break

#     # Combine the results from all ranks
#     global_found = comm.allreduce(found, op=MPI.LOR)

#     # Return the final result
#     return global_found


# Parallel search function
def parallel_search(data, target):
    comm = MPI.COMM_WORLD
    rank = comm.Get_rank()
    size = comm.Get_size()

    # Measure time taken to scatter data
    t1 = MPI.Wtime()

    # Scatter data to all ranks
    chunk_size = len(data) // size
    remainder = len(data) % size
    sendbuf = [data[i:i+chunk_size] for i in range(0, len(data), chunk_size)]
    sendcounts = [len(sendbuf[i]) for i in range(size)]
    displs = [sum(sendcounts[:i]) for i in range(size)]
    recvbuf = comm.scatter(sendbuf, root=0)

    # Measure time taken to scatter data
    t2 = MPI.Wtime()

    # Search for target in the received data
    found = False
    for item in recvbuf:
        if item == target:
            found = True
            break

    # Combine the results from all ranks
    global_found = comm.allreduce(found, op=MPI.LOR)

    #Measure the time to join threads 
    t3 = MPI.Wtime()

    # Return the final result and time taken to scatter data
    return global_found, t2 - t1, t3 - t2



# Generate example data
with open('sample-text.txt', 'r') as file:
    lines = file.readlines()

words = []
for line in lines:
    line = line.strip()  # remove leading/trailing whitespaces
    if line:  # skip empty lines
        words_in_line = line.split()
        words.extend(words_in_line)


# printing the txt file
# print(words,"\n")


#######################

# Analysis: Linear search
s_start_time = time.time()
result = linear_search(words, "Chirag")
s_end_time = time.time()
if result:
    print("Linear search: Target found!")
else:
    print("Linear search: Target not found.")
print("Linear search time: ", (s_end_time - s_start_time), "\n")

# # Analysis: Parallel search
# p_start_time = time.time()
# result = parallel_search(words, "Chirag")
# p_end_time = time.time()
# if result:
#     print("Parallel search: Target found!")
# else:
#     print("Parallel search: Target not found.")
# print("Parallel search time: ", p_end_time - p_start_time, "\n")


# Analysis: Parallel search
p_start_time = time.time()
result, thread_creation_time, thread_join_time = parallel_search(words, "Chirag")
p_end_time = time.time()
if result:
    print("Parallel search: Target found!")
else:
    print("Parallel search: Target not found.")
print("Parallel search time: ", p_end_time - p_start_time)
print("Thread creation time: ", thread_creation_time)
print("Thread join time: ", thread_join_time)

ptTaken = p_end_time - p_start_time - thread_creation_time - thread_join_time
print("Actuall Parallel search time: ", ptTaken)


Linear search: Target not found.
Linear search time:  0.004189491271972656 

Parallel search: Target not found.
Parallel search time:  0.03345847129821777
Thread creation time:  0.02738219399998343
Thread join time:  0.003839694999896892
Actuall Parallel search time:  0.002236582298337453


#### Complexity Analysis:




In [69]:
p_time = p_end_time - p_start_time - thread_creation_time - thread_join_time
s_time = s_end_time - s_start_time

In [70]:
performance_ratio = s_time / p_time
print("The Parallel Algorithm implemented is better than the Serial Algorithm by ", performance_ratio, "X")

The Parallel Algorithm implemented is better than the Serial Algorithm by  1.8731666056227325 X
