<a href="https://colab.research.google.com/github/Bindulekh/private/blob/main/parallel.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import threading

def print_numbers():
    for i in range(10):
        print(f"{i}")

def print_letters():
    for letter in 'abcdefghij':
        print(f"{letter}")

print_numbers()
print_letters()

0
1
2
3
4
5
6
7
8
9
a
b
c
d
e
f
g
h
i
j


The start() method starts each thread, and the join() method ensures that the main program waits for both threads to finish before it continues

In [2]:
import threading

def print_numbers():
    for i in range(10):
        print(f"{i}")

def print_letters():
    for letter in 'abcdefghij':
        print(f"{letter}")

t1 = threading.Thread(target=print_numbers)
t2 = threading.Thread(target=print_letters)

t1.start()
t2.start()

t1.join()
t2.join()

0
1
2
3
4
5
6
7
8
9
a
b
c
d
e
f
g
h
i
j


Please note that due to the nature of threading and the Global Interpreter Lock (GIL) in Python, the multithreaded code might not always be faster than the normal code, especially for CPU-bound tasks like in this example.

In [3]:
import threading
import time

def print_numbers():
    for i in range(10):
        print(f"{i}")

def print_letters():
    for letter in 'abcdefghij':
        print(f"{letter}")

# Multithreaded execution
start_time = time.time()

t1 = threading.Thread(target=print_numbers)
t2 = threading.Thread(target=print_letters)

t1.start()
t2.start()

t1.join()
t2.join()

end_time = time.time()
print(f"\nTime taken by multithreaded code: {end_time - start_time} seconds\n")

# Normal (sequential) execution
start_time = time.time()

print_numbers()
print_letters()

end_time = time.time()
print(f"\nTime taken by normal code: {end_time - start_time} seconds\n")


0a
b
c
d
e
f
g
h
i
j

1
2
3
4
5
6
7
8
9

Time taken by multithreaded code: 0.018474102020263672 seconds

0
1
2
3
4
5
6
7
8
9
a
b
c
d
e
f
g
h
i
j

Time taken by normal code: 0.0004267692565917969 seconds



In [4]:
from multiprocessing import Process

def print_numbers():
    for i in range(10):
        print(f"{i}")

def print_letters():
    for letter in 'abcdefghij':
        print(f"{letter}")

p1 = Process(target=print_numbers)
p2 = Process(target=print_letters)

p1.start()
p2.start()

p1.join()
p2.join()


0
1
a
2
3
b4
c
d
e
f

g5
6

7h

8
i9

j


In [5]:
import time
from multiprocessing import Process

def print_numbers():
    for i in range(10000):
        x = 1
        x += 5
        x *= 4

def print_letters():
    for i in range(10000):
        x = 1
        x += 5
        x *= 4

# Record the start time before running the normal code
start_time_normal = time.time()

print_numbers()
print_letters()

# Record the end time after running the normal code and calculate the elapsed time
end_time_normal = time.time()
elapsed_time_normal = end_time_normal - start_time_normal

# Record the start time before running the multiprocessing code
start_time_multi = time.time()

p1 = Process(target=print_numbers)
p2 = Process(target=print_letters)

p1.start()
p2.start()

p1.join()
p2.join()

# Record the end time after running the multiprocessing code and calculate the elapsed time
end_time_multi = time.time()
elapsed_time_multi = end_time_multi - start_time_multi

print(f"Time taken by normal code: {elapsed_time_normal} seconds")
print(f"Time taken by multiprocessing code: {elapsed_time_multi} seconds")


Time taken by normal code: 0.004586458206176758 seconds
Time taken by multiprocessing code: 0.029171466827392578 seconds


In [10]:
start_time_normal = time.time()


In [11]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [12]:
import pandas as pd
import numpy as np
path="/content/drive/MyDrive/netflixtitles.csv"
df=pd.read_csv(path,encoding='latin-1')

In [8]:
df.head()

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,...,Unnamed: 16,Unnamed: 17,Unnamed: 18,Unnamed: 19,Unnamed: 20,Unnamed: 21,Unnamed: 22,Unnamed: 23,Unnamed: 24,Unnamed: 25
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,,United States,"September 25, 2021",2020,PG-13,90 min,...,,,,,,,,,,
1,s2,TV Show,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",South Africa,"September 24, 2021",2021,TV-MA,2 Seasons,...,,,,,,,,,,
2,s3,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",,"September 24, 2021",2021,TV-MA,1 Season,...,,,,,,,,,,
3,s4,TV Show,Jailbirds New Orleans,,,,"September 24, 2021",2021,TV-MA,1 Season,...,,,,,,,,,,
4,s5,TV Show,Kota Factory,,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",India,"September 24, 2021",2021,TV-MA,2 Seasons,...,,,,,,,,,,


In [13]:
end_time_normal = time.time()
elapsed_time_normal = end_time_normal - start_time_normal

In [14]:
print(f"Time taken by normal code: {elapsed_time_normal} seconds")

Time taken by normal code: 15.455750226974487 seconds


In [15]:
start_time_multi = time.time()

p1 = Process(target=pd.read_csv(path,encoding='latin-1'))


p1.start()


p1.join()


# Record the end time after running the multiprocessing code and calculate the elapsed time
end_time_multi = time.time()
elapsed_time_multi = end_time_multi - start_time_multi

print(f"Time taken by multiprocessing code: {elapsed_time_multi} seconds")


Process Process-5:
Traceback (most recent call last):
  File "/usr/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
    self.run()
  File "/usr/lib/python3.10/multiprocessing/process.py", line 107, in run
    if self._target:
  File "/usr/local/lib/python3.10/dist-packages/pandas/core/generic.py", line 1466, in __nonzero__
    raise ValueError(
ValueError: The truth value of a DataFrame is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().


Time taken by multiprocessing code: 0.13583898544311523 seconds
