<a href="https://colab.research.google.com/github/2303a51060Nirnaya/High_performance_computing-Hcp-/blob/main/Lab_01%26_02.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

https://docs.python.org/3/library/profile.html

In [24]:
import cProfile
import re
cProfile.run('re.compile("foo|bar")')

         6 function calls in 0.000 seconds

   Ordered by: standard name

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000    0.000    0.000 <string>:1(<module>)
        1    0.000    0.000    0.000    0.000 __init__.py:226(compile)
        1    0.000    0.000    0.000    0.000 __init__.py:280(_compile)
        1    0.000    0.000    0.000    0.000 {built-in method builtins.exec}
        1    0.000    0.000    0.000    0.000 {built-in method builtins.isinstance}
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}




In [2]:
# section_a_vector_dot.py
import time, random, cProfile, pstats, io, tracemalloc, sys

def gen_data(n, seed=42):
  random.seed(seed)
  x = [random.random() for _ in range(n)]
  y = [random.random() for _ in range(n)]
  return x, y

def vector_add(x, y):
  out = []
  for a, b in zip(x, y):
    out.append(a + b)
  return out

def dot_product(x, y):
  s = 0.0
  for a, b in zip(x, y):
    s += a * b
  return s

def main():
  # Ensure tracemalloc is stopped from any previous runs before starting new profiling
  if tracemalloc.is_tracing():
      tracemalloc.stop()

  # Explicitly ensure sys.setprofile is None before attempting to enable cProfile
  # This is the most robust way to clear any lingering profiler hooks.
  sys.setprofile(None)

  # Ensure any previous cProfile or other sys.setprofile hooks are disabled
  # by creating a temporary profiler and immediately disabling it.
  # This makes the environment clean before enabling the current profiler.
  temp_pr = cProfile.Profile()
  temp_pr.disable() # This sets sys.setprofile(None)

  N = 2_000_000 # adjust based on your machine
  x, y = gen_data(N)

  # --- CPU Profiling with cProfile ---
  pr = cProfile.Profile()
  pr.enable()

  t0 = time.perf_counter()
  s = dot_product(x, y)
  t1 = time.perf_counter()

  t2 = time.perf_counter()
  z = vector_add(x, y)
  t3 = time.perf_counter()

  pr.disable()

  # --- Memory Profiling with tracemalloc ---
  # Since cProfile is now disabled, tracemalloc can run.
  tracemalloc.start()
  # Rerun operations to capture memory usage
  _ = dot_product(x, y) # Assign to _ as result is not needed, just memory
  _ = vector_add(x, y)
  current, peak = tracemalloc.get_traced_memory()
  tracemalloc.stop()

  # Print results
  print(f"Vector length N={N}")
  print(f"Dot product: {s:.6f} | Time: {t1 - t0:.3f} s")
  print(f"Vector add: len(z)={len(z)} | Time: {t3 - t2:.3f} s")
  print(f"Current/Peak memory: {current/1e6:.2f} MB / {peak/1e6:.2f} MB")

  # Profiler output
  s_buf = io.StringIO()
  ps = pstats.Stats(pr, stream=s_buf).sort_stats('cumtime')
  ps.print_stats(15)
  print("\n--- cProfile (Top 15 by cumulative time) ---")
  print(s_buf.getvalue())

# Call main() to execute the profiling when the cell is run
  main()


In [3]:
# section_a_vector_dot.py
import time
import random
import cProfile
import pstats
import io
import tracemalloc
import sys


def gen_data(n, seed=42):
    random.seed(seed)
    x = [random.random() for _ in range(n)]
    y = [random.random() for _ in range(n)]
    return x, y


def vector_add(x, y):
    out = []
    for a, b in zip(x, y):
        out.append(a + b)
    return out


def dot_product(x, y):
    s = 0.0
    for a, b in zip(x, y):
        s += a * b
    return s


def main():
    # ---- Ensure clean profiling environment ----
    if tracemalloc.is_tracing():
        tracemalloc.stop()

    sys.setprofile(None)

    N = 2_000_000  # reduce to 500_000 if system is slow
    x, y = gen_data(N)

    # ---- CPU Profiling ----
    pr = cProfile.Profile()
    pr.enable()

    t0 = time.perf_counter()
    s = dot_product(x, y)
    t1 = time.perf_counter()

    t2 = time.perf_counter()
    z = vector_add(x, y)
    t3 = time.perf_counter()

    pr.disable()

    # ---- Memory Profiling ----
    tracemalloc.start()
    _ = dot_product(x, y)
    _ = vector_add(x, y)
    current, peak = tracemalloc.get_traced_memory()
    tracemalloc.stop()

    # ---- Results ----
    print(f"Vector length N = {N}")
    print(f"Dot product result: {s:.6f}")
    print(f"Dot product time: {t1 - t0:.3f} s")
    print(f"Vector add time: {t3 - t2:.3f} s")
    print(f"Current memory: {current/1e6:.2f} MB")
    print(f"Peak memory: {peak/1e6:.2f} MB")

    # ---- cProfile Output ----
    s_buf = io.StringIO()
    ps = pstats.Stats(pr, stream=s_buf).sort_stats("cumtime")
    ps.print_stats(10)

    print("\n--- cProfile (Top 10 by cumulative time) ---")
    print(s_buf.getvalue())


# ---- Proper entry point ----
if __name__ == "__main__":
    main()


Vector length N = 2000000
Dot product result: 499712.974120
Dot product time: 0.171 s
Vector add time: 1.921 s
Current memory: 65.13 MB
Peak memory: 65.13 MB

--- cProfile (Top 10 by cumulative time) ---
         2000011 function calls (2000010 primitive calls) in 2.091 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.630    0.630    1.005    1.005 {built-in method time.sleep}
  2000000    0.725    0.000    0.725    0.000 {method 'append' of 'list' objects}
      2/1    0.566    0.283    0.313    0.313 /tmp/ipython-input-1096687492.py:18(vector_add)
        1    0.171    0.171    0.171    0.171 /tmp/ipython-input-1096687492.py:25(dot_product)
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}
        4    0.000    0.000    0.000    0.000 {built-in method time.perf_counter}
        2    0.000    0.000    0.000    0.000 {built-in method posix.getppid}





In [4]:

# section_a_vector_dot.py
import time, random, cProfile, pstats, io, tracemalloc

def gen_data(n, seed=42):
    random.seed(seed)
    x = [random.random() for _ in range(n)]
    y = [random.random() for _ in range(n)]
    return x, y

def vector_add(x, y):
    out = []
    for a, b in zip(x, y):
        out.append(a + b)
    return out

def dot_product(x, y):
    s = 0.0
    for a, b in zip(x, y):
        s += a * b
    return s

def main():
    N = 2_000_000  # adjust based on your machine
    x, y = gen_data(N)

    tracemalloc.start()
    pr = cProfile.Profile()
    pr.enable()

    t0 = time.perf_counter()
    s = dot_product(x, y)
    t1 = time.perf_counter()

    t2 = time.perf_counter()
    z = vector_add(x, y)
    t3 = time.perf_counter()

    pr.disable()
    current, peak = tracemalloc.get_traced_memory()
    tracemalloc.stop()

    # Print results
    print(f"Vector length N={N}")
    print(f"Dot product: {s:.6f} | Time: {t1 - t0:.3f} s")
    print(f"Vector add: len(z)={len(z)} | Time: {t3 - t2:.3f} s")
    print(f"Current/Peak memory: {current/1e6:.2f} MB / {peak/1e6:.2f} MB")

    # Profiler output
    s_buf = io.StringIO()
    ps = pstats.Stats(pr, stream=s_buf).sort_stats('cumtime')
    ps.print_stats(15)
    print("\n--- cProfile (Top 15 by cumulative time) ---")
    print(s_buf.getvalue())

if __name__ == "__main__":
    main()

Vector length N=2000000
Dot product: 499712.974120 | Time: 0.741 s
Vector add: len(z)=2000000 | Time: 7.338 s
Current/Peak memory: 65.13 MB / 65.13 MB

--- cProfile (Top 15 by cumulative time) ---
         2000023 function calls (2000022 primitive calls) in 8.079 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        7    4.675    0.668    7.039    1.006 {built-in method time.sleep}
  2000000    2.465    0.000    2.465    0.000 {method 'append' of 'list' objects}
        1    0.741    0.741    0.741    0.741 /tmp/ipython-input-3909820823.py:16(dot_product)
      2/1    0.198    0.099    0.270    0.270 /tmp/ipython-input-3909820823.py:10(vector_add)
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}
        8    0.000    0.000    0.000    0.000 {built-in method posix.getppid}
        4    0.000    0.000    0.000    0.000 {built-in method time.perf_counter}





In [5]:
# section_a_vector_dot.py
import time, random, cProfile, pstats, io, tracemalloc

def gen_data(n, seed=42):
    random.seed(seed)
    x = [random.random() for _ in range(n)]
    y = [random.random() for _ in range(n)]
    return x, y

def vector_add(x, y):
    out = []
    for a, b in zip(x, y):
        out.append(a + b)
    return out

def dot_product(x, y):
    s = 0.0
    for a, b in zip(x, y):
        s += a * b
    return s

def main():
    N = 2_000_000  # adjust based on your machine
    x, y = gen_data(N)

    tracemalloc.start()
    pr = cProfile.Profile()
    pr.enable()

    t0 = time.perf_counter()
    s = dot_product(x, y)
    t1 = time.perf_counter()

    t2 = time.perf_counter()
    z = vector_add(x, y)
    t3 = time.perf_counter()

    pr.disable()
    current, peak = tracemalloc.get_traced_memory()
    tracemalloc.stop()

    # Print results
    print(f"Vector length N={N}")
    print(f"Dot product: {s:.6f} | Time: {t1 - t0:.3f} s")
    print(f"Vector add: len(z)={len(z)} | Time: {t3 - t2:.3f} s")
    print(f"Current/Peak memory: {current/1e6:.2f} MB / {peak/1e6:.2f} MB")

    # Profiler output
    s_buf = io.StringIO()
    ps = pstats.Stats(pr, stream=s_buf).sort_stats('cumtime')
    ps.print_stats(15)
    print("\n--- cProfile (Top 15 by cumulative time) ---")
    print(s_buf.getvalue())

if __name__ == "__main__":
    main()

Vector length N=2000000
Dot product: 499712.974120 | Time: 0.754 s
Vector add: len(z)=2000000 | Time: 6.111 s
Current/Peak memory: 65.13 MB / 65.13 MB

--- cProfile (Top 15 by cumulative time) ---
         2000020 function calls in 6.757 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        6    3.574    0.596    5.387    0.898 {built-in method time.sleep}
  2000000    2.063    0.000    2.063    0.000 {method 'append' of 'list' objects}
        1    0.473    0.473    0.724    0.724 /tmp/ipython-input-425613865.py:10(vector_add)
        1    0.646    0.646    0.646    0.646 /tmp/ipython-input-425613865.py:16(dot_product)
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}
        7    0.000    0.000    0.000    0.000 {built-in method posix.getppid}
        4    0.000    0.000    0.000    0.000 {built-in method time.perf_counter}





In [6]:

# section_b_matmul.py
import time, random, cProfile, pstats, io, tracemalloc

def gen_matrix(n, seed=123):
    random.seed(seed)
    return [[random.random() for _ in range(n)] for _ in range(n)]

def matmul_naive(A, B):
    n = len(A)
    C = [[0.0]*n for _ in range(n)]
    for i in range(n):
        for k in range(n):
            aik = A[i][k]
            for j in range(n):
                C[i][j] += aik * B[k][j]
    return C

def main():
    n = 150  # raise/lower based on your machine; O(n^3)
    A = gen_matrix(n, seed=1)
    B = gen_matrix(n, seed=2)

    tracemalloc.start()
    pr = cProfile.Profile()
    pr.enable()

    t0 = time.perf_counter()
    C = matmul_naive(A, B)
    t1 = time.perf_counter()

    pr.disable()
    current, peak = tracemalloc.get_traced_memory()
    tracemalloc.stop()

    print(f"Matrix size n={n} -> {n}x{n}")
    print(f"Time: {t1 - t0:.3f} s | C[0][0]={C[0][0]:.6f}")
    print(f"Current/Peak memory: {current/1e6:.2f} MB / {peak/1e6:.2f} MB")

    s_buf = io.StringIO()
    ps = pstats.Stats(pr, stream=s_buf).sort_stats('cumtime')
    ps.print_stats(10)
    print("\n--- cProfile (Top 10 by cumulative time) ---")
    print(s_buf.getvalue())

if __name__ == "__main__":
    main()


Matrix size n=150 -> 150x150
Time: 2.916 s | C[0][0]=37.256203
Current/Peak memory: 0.75 MB / 0.75 MB

--- cProfile (Top 10 by cumulative time) ---
         368 function calls (364 primitive calls) in 2.916 seconds

   Ordered by: cumulative time
   List reduced from 77 to 10 due to restriction <10>

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        3    0.000    0.000    2.870    0.957 /usr/lib/python3.12/asyncio/base_events.py:1922(_run_once)
        3    0.000    0.000    2.869    0.956 /usr/lib/python3.12/selectors.py:451(select)
        2    1.847    0.923    1.847    0.923 {built-in method time.sleep}
        1    0.881    0.881    0.881    0.881 /tmp/ipython-input-2637978816.py:8(matmul_naive)
        2    0.140    0.070    0.140    0.070 {method 'poll' of 'select.epoll' objects}
       13    0.045    0.003    0.045    0.003 /usr/local/lib/python3.12/dist-packages/zmq/sugar/socket.py:632(send)
        1    0.000    0.000    0.007    0.007 /usr/local

In [7]:
# section_c_conv2d.py
import time, cProfile, pstats, io, tracemalloc

def gen_grid(h, w):
    return [[(i*j) % 255 / 255.0 for j in range(w)] for i in range(h)]

def conv2d(grid, kernel):
    H, W = len(grid), len(grid[0])
    kh, kw = len(kernel), len(kernel[0])
    rh, rw = kh//2, kw//2
    out = [[0.0]*W for _ in range(H)]
    for i in range(rh, H-rh):
        for j in range(rw, W-rw):
            acc = 0.0
            for di in range(-rh, rh+1):
                for dj in range(-rw, rw+1):
                    acc += grid[i+di][j+dj] * kernel[di+rh][dj+rw]
            out[i][j] = acc
    return out

def make_uniform_kernel(size=5):
    val = 1.0 / (size*size)
    return [[val]*size for _ in range(size)]

def main():
    H, W = 256, 256  # adjust based on machine
    grid = gen_grid(H, W)
    kernel = make_uniform_kernel(5)

    tracemalloc.start()
    pr = cProfile.Profile()
    pr.enable()

    t0 = time.perf_counter()
    out = conv2d(grid, kernel)
    t1 = time.perf_counter()

    pr.disable()
    current, peak = tracemalloc.get_traced_memory()
    tracemalloc.stop()

    print(f"Grid: {H}x{W} | Kernel: 5x5 | Time: {t1 - t0:.3f} s")
    print(f"Sample out[128][128]={out[128][128]:.6f}")
    print(f"Current/Peak memory: {current/1e6:.2f} MB / {peak/1e6:.2f} MB")

    s_buf = io.StringIO()
    ps = pstats.Stats(pr, stream=s_buf).sort_stats('cumtime')
    ps.print_stats(10)
    print("\n--- cProfile (Top 10 by cumulative time) ---")
    print(s_buf.getvalue())

if __name__ == "__main__":
    main()

Grid: 256x256 | Kernel: 5x5 | Time: 4.640 s
Sample out[128][128]=0.490980
Current/Peak memory: 2.07 MB / 2.07 MB

--- cProfile (Top 10 by cumulative time) ---
         38 function calls in 4.468 seconds

   Ordered by: cumulative time
   List reduced from 24 to 10 due to restriction <10>

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000    4.467    4.467 /usr/lib/python3.12/asyncio/base_events.py:1922(_run_once)
        1    0.000    0.000    4.467    4.467 /usr/lib/python3.12/selectors.py:451(select)
        4    3.916    0.979    3.916    0.979 {built-in method time.sleep}
        1    0.551    0.551    0.551    0.551 /tmp/ipython-input-3230325479.py:7(conv2d)
        1    0.000    0.000    0.000    0.000 /usr/lib/python3.12/asyncio/events.py:86(_run)
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}
        1    0.000    0.000    0.000    0.000 {method 'run' of '_contextvars.Context' obje

In [8]:
# section_d_montecarlo_pi.py
import time, random, cProfile, pstats, io, tracemalloc

def estimate_pi(n_samples, seed=2025):
    random.seed(seed)
    inside = 0
    for _ in range(n_samples):
        x = random.random()
        y = random.random()
        if x*x + y*y <= 1.0:
            inside += 1
    return 4.0 * inside / n_samples

def main():
    N = 2_000_000  # adjust for your machine
    tracemalloc.start()
    pr = cProfile.Profile()
    pr.enable()

    t0 = time.perf_counter()
    pi_est = estimate_pi(N)
    t1 = time.perf_counter()

    pr.disable()
    current, peak = tracemalloc.get_traced_memory()
    tracemalloc.stop()

    print(f"N={N} -> pi ≈ {pi_est:.6f} | Time: {t1 - t0:.3f} s")
    print(f"Current/Peak memory: {current/1e6:.2f} MB / {peak/1e6:.2f} MB")

    s_buf = io.StringIO()
    ps = pstats.Stats(pr, stream=s_buf).sort_stats('cumtime')
    ps.print_stats(10)
    print("\n--- cProfile (Top 10 by cumulative time) ---")
    print(s_buf.getvalue())

if __name__ == "__main__":
    main()

N=2000000 -> pi ≈ 3.142150 | Time: 4.982 s
Current/Peak memory: 0.03 MB / 0.03 MB

--- cProfile (Top 10 by cumulative time) ---
         4000492 function calls (4000480 primitive calls) in 5.017 seconds

   Ordered by: cumulative time
   List reduced from 113 to 10 due to restriction <10>

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        4    3.377    0.844    4.021    1.005 {built-in method time.sleep}
  4000000    0.796    0.000    0.796    0.000 {method 'random' of '_random.Random' objects}
        1    0.444    0.444    0.529    0.529 /tmp/ipython-input-3913647630.py:4(estimate_pi)
       13    0.040    0.003    0.047    0.004 /usr/local/lib/python3.12/dist-packages/zmq/sugar/socket.py:632(send)
        1    0.000    0.000    0.016    0.016 /usr/local/lib/python3.12/dist-packages/ipykernel/iostream.py:219(<lambda>)
        1    0.000    0.000    0.016    0.016 /usr/local/lib/python3.12/dist-packages/ipykernel/iostream.py:221(_really_send)
        1   

In [9]:
# section_e_pairwise.py
import time, random, math, cProfile, pstats, io, tracemalloc

def gen_points(n, seed=7):
    random.seed(seed)
    return [(random.random(), random.random()) for _ in range(n)]

def pairwise_potential(points, eps=1e-6):
    n = len(points)
    pot = [0.0]*n
    for i in range(n):
        xi, yi = points[i]
        acc = 0.0
        for j in range(n):
            if i == j:
                continue
            xj, yj = points[j]
            dx, dy = xi - xj, yi - yj
            r = math.sqrt(dx*dx + dy*dy) + eps
            acc += 1.0 / r
        pot[i] = acc
    return pot

def main():
    N = 800  # adjust based on machine; O(N^2) interactions
    pts = gen_points(N)

    tracemalloc.start()
    pr = cProfile.Profile()
    pr.enable()

    t0 = time.perf_counter()
    pot = pairwise_potential(pts)
    t1 = time.perf_counter()

    pr.disable()
    current, peak = tracemalloc.get_traced_memory()
    tracemalloc.stop()

    print(f"N={N} -> computed potentials | Time: {t1 - t0:.3f} s")
    print(f"Sample pot[0]={pot[0]:.6f}, pot[N//2]={pot[N//2]:.6f}")
    print(f"Current/Peak memory: {current/1e6:.2f} MB / {peak/1e6:.2f} MB")

    s_buf = io.StringIO()
    ps = pstats.Stats(pr, stream=s_buf).sort_stats('cumtime')
    ps.print_stats(10)
    print("\n--- cProfile (Top 10 by cumulative time) ---")
    print(s_buf.getvalue())

if __name__ == "__main__":
    main()

N=800 -> computed potentials | Time: 3.383 s
Sample pot[0]=2390.396335, pot[N//2]=2337.388513
Current/Peak memory: 0.04 MB / 0.05 MB

--- cProfile (Top 10 by cumulative time) ---
         639491 function calls (639479 primitive calls) in 3.422 seconds

   Ordered by: cumulative time
   List reduced from 76 to 10 due to restriction <10>

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        2    1.709    0.855    2.013    1.006 {built-in method time.sleep}
        1    0.772    0.772    0.911    0.911 /tmp/ipython-input-2231473572.py:8(pairwise_potential)
   639200    0.512    0.000    0.512    0.000 {built-in method math.sqrt}
        6    0.004    0.001    0.005    0.001 /usr/local/lib/python3.12/dist-packages/zmq/sugar/socket.py:632(send)
        2    0.000    0.000    0.001    0.000 /usr/lib/python3.12/asyncio/events.py:86(_run)
        2    0.000    0.000    0.000    0.000 {method 'run' of '_contextvars.Context' objects}
        2    0.000    0.000    0.00

# **Expt-01: Accessing HPC Cluster and Job Submission**

In [10]:
# hello_cluster.py
import os
import platform
import sys
print("Hello from HPC cluster!")
print("User:", os.getenv("USER"))
print("Host:", platform.node())
print("Python:", sys.version)
print("Cores (logical):", os.cpu_count())

Hello from HPC cluster!
User: None
Host: fc8ada169c0e
Python: 3.12.12 (main, Oct 10 2025, 08:52:57) [GCC 11.4.0]
Cores (logical): 2


In [11]:
# serial_baseline.py
import time, os, platform, sys
def work(n=5_000_00): # ~500k iterations
  s = 0.0
  for i in range(n):
    s += (i % 7) * 0.123456
  return s
if __name__ == "__main__":
  print("=== Job Info ===")
print("Host:", platform.node())
print("User:", os.getenv("USER"))
print("Python:", sys.version)
print("=== Work ===")
t0 = time.perf_counter()
result = work()
t1 = time.perf_counter()
print(f"Result={result:.6f} | Time={(t1 - t0):.3f} s")

=== Job Info ===
Host: fc8ada169c0e
User: None
Python: 3.12.12 (main, Oct 10 2025, 08:52:57) [GCC 11.4.0]
=== Work ===
Result=185183.259265 | Time=0.075 s


In [12]:
!salloc -p short -N 1 -n 1 -c 1 -t 00:10:00 # This command is for HPC clusters and will likely fail in Colab
# Once granted:
!hostname
# The Python code below will now run directly within the Colab environment.
import os, platform
print("Interactive on:", platform.node(), "USER:", os.getenv("USER"))
print("Logical CPUs:", os.cpu_count())
# No need for !exit as the cell execution finishes naturally.

/bin/bash: line 1: salloc: command not found
fc8ada169c0e
Interactive on: fc8ada169c0e USER: None
Logical CPUs: 2


In [13]:
%%writefile mc_pi_array.py
# mc_pi_array.py
import os, time, random

def estimate_pi(n_samples, seed=42):
  random.seed(seed)
  inside = 0
  for _ in range(n_samples):
    x = random.random(); y = random.random()
    inside += (x*x + y*y <= 1.0)
  return 4.0 * inside / n_samples

if __name__ == "__main__":
# Read task index from Slurm/PBS
  tid = int(os.getenv("SLURM_ARRAY_TASK_ID", os.getenv("PBS_ARRAYID", "0")))
  sizes = [50_000, 100_000, 200_000, 400_000, 800_000]
  n = sizes[tid % len(sizes)]
  t0 = time.perf_counter()
  pi = estimate_pi(n, seed=100 + tid)
  t1 = time.perf_counter()
  print(f"TaskID={tid} N={n} Pi\u2248{pi:.6f} Time={(t1 - t0):.3f}s")

Writing mc_pi_array.py


In [14]:
# mc_pi_array.py
import os, time, random

def estimate_pi(n_samples, seed=42):
  random.seed(seed)
  inside = 0
  for _ in range(n_samples):
    x = random.random(); y = random.random()
    inside += (x*x + y*y <= 1.0)
  return 4.0 * inside / n_samples

if __name__ == "__main__":
# Read task index from Slurm/PBS
  tid = int(os.getenv("SLURM_ARRAY_TASK_ID", os.getenv("PBS_ARRAYID", "0")))
  sizes = [50_000, 100_000, 200_000, 400_000, 800_000]
  n = sizes[tid % len(sizes)]
  t0 = time.perf_counter()
  pi = estimate_pi(n, seed=100 + tid)
  t1 = time.perf_counter()
  print(f"TaskID={tid} N={n} Pi\u2248{pi:.6f} Time={(t1 - t0):.3f}s")

TaskID=0 N=50000 Pi≈3.141360 Time=0.026s


In [15]:
%%bash
#!/bin/bash
#SBATCH -J mc_array
#SBATCH -p short
#SBATCH -N 1
#SBATCH -n 1
#SBATCH -c 1
#SBATCH -t 00:10:00
#SBATCH --array=0-4
#SBATCH -o mc_%A_%a.out
#SBATCH -e mc_%A_%a.err

python3 mc_pi_array.py
# The following lines are instructional and not part of the script:
# Submit & monitor (Slurm):
# sbatch run_mc_array.slurm
# squeue -u $USER

TaskID=0 N=50000 Pi≈3.141360 Time=0.020s


# **# Section E — Modules/Venv, Scratch I/O, and Resource Flags**

In [16]:
# io_to_scratch.py
import os, time, platform

def compute(n=300_000):
  s = 0.0
  for i in range(n):
    s += (i % 13) * 0.314159
  return s

if __name__ == "__main__":
    scratch = os.getenv("SLURM_TMPDIR", os.getenv("TMPDIR", "/tmp"))
result = compute()
ts = time.strftime("%Y%m%d_%H%M%S")
out_path = os.path.join(scratch, f"result_{ts}.txt")
with open(out_path, "w") as f:
  f.write(f"Host={platform.node()}\nResult={result:.6f}\n")
print("Wrote:", out_path)

Wrote: /tmp/result_20260112_083531.txt


In [17]:
%%bash
#!/bin/bash
#SBATCH -J io_scratch
#SBATCH -p short
#SBATCH -N 1
#SBATCH -n 1
#SBATCH -c 1
#SBATCH -t 00:05:00
#SBATCH -o io_%j.out
#SBATCH -e io_%j.err

# Option A: modules (example — change to your cluster)
# module purge
# module load python/3.10

# Option B: venv (if allowed)
# python3 -m venv ~/venvs/hpc_py && source ~/venvs/hpc_py/bin/activate
# pip install --upgrade pip

python3 io_to_scratch.py

# Copy back to work dir
SCRATCH=${SLURM_TMPDIR:-/tmp}
cp -v "$SCRATCH"/result_*.txt "$PWD"/

'/tmp/result_20260112_083531.txt' -> '/content/result_20260112_083531.txt'


python3: can't open file '/content/io_to_scratch.py': [Errno 2] No such file or directory


In [18]:
%%writefile io_to_scratch.py
# io_to_scratch.py
import os, time, platform

def compute(n=300_000):
  s = 0.0
  for i in range(n):
    s += (i % 13) * 0.314159
  return s

if __name__ == "__main__":
    scratch = os.getenv("SLURM_TMPDIR", os.getenv("TMPDIR", "/tmp"))
    result = compute()
    ts = time.strftime("%Y%m%d_%H%M%S")
    out_path = os.path.join(scratch, f"result_{ts}.txt")
    with open(out_path, "w") as f:
      f.write(f"Host={platform.node()}\nResult={result:.6f}\n")
    print("Wrote:", out_path)

Writing io_to_scratch.py
