In [None]:
import numpy as np
import scipy.linalg
import time
import cupy as cp
import ctypes
import os
import csv
from statistics import mean, stdev
import signal
import sys


class GracefulExiter:
    def __init__(self):
        self.state = False
        signal.signal(signal.SIGINT, self.change_state)

    def change_state(self, signum, frame):
        print("\nReceived keyboard interrupt, finishing current iteration...")
        self.state = True

    def exit(self):
        return self.state

graceful_exit = GracefulExiter()

def compile_c_library():
    c_code = r"""
    #include <stddef.h>

    void matmul_c(const double* A, const double* B, double* C, int N) {
        for (int i = 0; i < N; i++) {
            for (int j = 0; j < N; j++) {
                double sum = 0.0;
                for (int k = 0; k < N; k++) {
                    sum += A[i * N + k] * B[k * N + j];
                }
                C[i * N + j] = sum;
            }
        }
    }
    """
    try:
        with open("matmul.c", "w") as f:
            f.write(c_code)
        if os.system("gcc -shared -fPIC matmul.c -o libmatmul.so") != 0:
            raise RuntimeError("Failed to compile C library")
        return ctypes.CDLL("./libmatmul.so")
    except Exception as e:
        print(f"Error setting up C library: {e}")
        sys.exit(1)

try:
    lib = compile_c_library()
    lib.matmul_c.argtypes = [
        np.ctypeslib.ndpointer(dtype=np.float64, ndim=1, flags='C_CONTIGUOUS'),
        np.ctypeslib.ndpointer(dtype=np.float64, ndim=1, flags='C_CONTIGUOUS'),
        np.ctypeslib.ndpointer(dtype=np.float64, ndim=1, flags='C_CONTIGUOUS'),
        ctypes.c_int
    ]
except Exception as e:
    print(f"Failed to load C library: {e}")
    sys.exit(1)

def c_matrix_mult(A, B):
    C = np.zeros((A.shape[0], B.shape[1]), dtype=np.float64)
    lib.matmul_c(A.ravel(), B.ravel(), C.ravel(), A.shape[0])
    return C

# ---------- helper functions ----------
def flops_for_n(n):
    return 2 * (n ** 3)

def gflops_from_time(n, t_sec):
    return (flops_for_n(n) / t_sec) / 1e9

perf = time.perf_counter 

# ---------- experiment settings ----------
step_size = 100
max_size = 1500
num_trials = 30
python_max_n = 1501  

def python_matrix_multiply(A, B):
    n = len(A)
    result = [[0.0] * n for _ in range(n)]
    for i in range(n):
        for j in range(n):
            s = 0.0
            for k in range(n):
                s += A[i][k] * B[k][j]
            result[i][j] = s
    return result

def safe_write_csv(writer, row):
    """Safely write to CSV with multiple attempts"""
    max_attempts = 3
    for attempt in range(max_attempts):
        try:
            writer.writerow(row)
            return True
        except Exception as e:
            print(f"Error writing to CSV (attempt {attempt + 1}): {e}")
            time.sleep(1)
    return False

def run_benchmark(N, csv_writer, run_idx):
    """Run all benchmarks for a single N and run_idx"""
    results = {}
    
    # Prepare arrays
    A = np.random.rand(N, N)
    B = np.random.rand(N, N)
    
    # Python naive
    if N <= python_max_n:
        try:
            A_list = A.tolist()
            B_list = B.tolist()
            t0 = perf()
            python_matrix_multiply(A_list, B_list)
            t1 = perf()
            t = t1 - t0
            g = gflops_from_time(N, t)
            results["python"] = (t, g)
            safe_write_csv(csv_writer, [N, "python", run_idx, f"{t:.6e}", f"{g:.6e}"])
        except Exception as e:
            print(f"Python naive failed for N={N}: {e}")
            results["python"] = (np.nan, np.nan)
    
    # NumPy
    try:
        A_np = np.ascontiguousarray(A, dtype=np.float64)
        B_np = np.ascontiguousarray(B, dtype=np.float64)
        t0 = perf()
        np.dot(A_np, B_np)
        t1 = perf()
        t = t1 - t0
        g = gflops_from_time(N, t)
        results["numpy"] = (t, g)
        safe_write_csv(csv_writer, [N, "numpy", run_idx, f"{t:.6e}", f"{g:.6e}"])
    except Exception as e:
        print(f"NumPy failed for N={N}: {e}")
        results["numpy"] = (np.nan, np.nan)
    
    # SciPy BLAS
    try:
        t0 = perf()
        scipy.linalg.blas.dgemm(1.0, A_np, B_np)
        t1 = perf()
        t = t1 - t0
        g = gflops_from_time(N, t)
        results["scipy"] = (t, g)
        safe_write_csv(csv_writer, [N, "scipy", run_idx, f"{t:.6e}", f"{g:.6e}"])
    except Exception as e:
        print(f"SciPy failed for N={N}: {e}")
        results["scipy"] = (np.nan, np.nan)
    
    # CuPy FP32
    try:
        A_cp32 = cp.asarray(A, dtype=cp.float32)
        B_cp32 = cp.asarray(B, dtype=cp.float32)
        cp.cuda.Device().synchronize()
        t0 = perf()
        cp.dot(A_cp32, B_cp32)
        cp.cuda.Device().synchronize()
        t1 = perf()
        t = t1 - t0
        g = gflops_from_time(N, t)
        results["cupy_fp32"] = (t, g)
        safe_write_csv(csv_writer, [N, "cupy_fp32", run_idx, f"{t:.6e}", f"{g:.6e}"])
    except Exception as e:
        print(f"CuPy FP32 failed for N={N}: {e}")
        results["cupy_fp32"] = (np.nan, np.nan)
    
    # CuPy FP64
    try:
        A_cp64 = cp.asarray(A, dtype=cp.float64)
        B_cp64 = cp.asarray(B, dtype=cp.float64)
        cp.cuda.Device().synchronize()
        t0 = perf()
        cp.dot(A_cp64, B_cp64)
        cp.cuda.Device().synchronize()
        t1 = perf()
        t = t1 - t0
        g = gflops_from_time(N, t)
        results["cupy_fp64"] = (t, g)
        safe_write_csv(csv_writer, [N, "cupy_fp64", run_idx, f"{t:.6e}", f"{g:.6e}"])
    except Exception as e:
        print(f"CuPy FP64 failed for N={N}: {e}")
        results["cupy_fp64"] = (np.nan, np.nan)
    
    # C naive
    try:
        A_ct = np.ascontiguousarray(A, dtype=np.float64)
        B_ct = np.ascontiguousarray(B, dtype=np.float64)
        t0 = perf()
        c_matrix_mult(A_ct, B_ct)
        t1 = perf()
        t = t1 - t0
        g = gflops_from_time(N, t)
        results["c"] = (t, g)
        safe_write_csv(csv_writer, [N, "c", run_idx, f"{t:.6e}", f"{g:.6e}"])
    except Exception as e:
        print(f"C naive failed for N={N}: {e}")
        results["c"] = (np.nan, np.nan)
    
    return results

def main():
   
    os.makedirs("results", exist_ok=True)
    csv_path = os.path.join("results", "results.csv")
    txt_path = os.path.join("results", "results.txt")
    
    
    try:
        csv_file = open(csv_path, "a", newline="")
        csv_writer = csv.writer(csv_file)
        
        
        if os.stat(csv_path).st_size == 0:
            csv_writer.writerow(["N", "impl", "run_index", "time_s", "gflops"])
            csv_file.flush()
    except Exception as e:
        print(f"Failed to initialize CSV file: {e}")
        sys.exit(1)
    
    summary_results = []
    
    try:
        
        _ = cp.zeros((2,2), dtype=cp.float32)
        cp.cuda.Device().synchronize()
        
        for N in range(1500, max_size + 1, step_size):
            if graceful_exit.exit():
                print("Early termination requested, finishing current N...")
                break
                
            print(f"\n=== Testing N={N} ===")
            per_method_runs = {
                "python": [],
                "numpy": [],
                "scipy": [],
                "cupy_fp32": [],
                "cupy_fp64": [],
                "c": []
            }
            
            for run_idx in range(26,num_trials):
                if graceful_exit.exit():
                    print("Early termination requested, stopping...")
                    break
                    
                print(f"Run {run_idx + 1}/{num_trials} for N={N}")
                
                try:
                    run_results = run_benchmark(N, csv_writer, run_idx)
                    for impl in run_results:
                        per_method_runs[impl].append(run_results[impl])
                    csv_file.flush()  
                except Exception as e:
                    print(f"Error during N={N} run {run_idx}: {e}")
                    continue
            
           
            for impl, runs in per_method_runs.items():
                t_values = [t for (t,g) in runs if not (np.isnan(t))]
                g_values = [g for (t,g) in runs if not (np.isnan(g))]
                
                if len(t_values) > 0:
                    t_mean = mean(t_values)
                    t_std = stdev(t_values) if len(t_values) > 1 else 0.0
                else:
                    t_mean, t_std = float("nan"), float("nan")
                    
                if len(g_values) > 0:
                    g_mean = mean(g_values)
                    g_std = stdev(g_values) if len(g_values) > 1 else 0.0
                else:
                    g_mean, g_std = float("nan"), float("nan")
                    
                summary_results.append({
                    "N": N,
                    "impl": impl,
                    "mean_time": t_mean,
                    "std_time": t_std,
                    "mean_gflops": g_mean,
                    "std_gflops": g_std
                })
                print(f"N={N} impl={impl:10s} mean_time={t_mean:.6f}s (±{t_std:.6f}) mean_gflops={g_mean:.3f} (±{g_std:.3f})")
        
       
        try:
            with open(txt_path, "w") as f:
                f.write("N\timpl\tmean_time\tstd_time\tmean_GFLOPS\tstd_GFLOPS\n")
                for r in summary_results:
                    f.write(f"{r['N']}\t{r['impl']}\t{r['mean_time']:.6e}\t{r['std_time']:.6e}\t{r['mean_gflops']:.6f}\t{r['std_gflops']:.6f}\n")
        except Exception as e:
            print(f"Error writing summary file: {e}")
    
    except Exception as e:
        print(f"Unexpected error: {e}")
    finally:
        try:
            csv_file.close()
        except:
            pass
        print("\nResults saved to:")
        print(f" - Detailed data: {csv_path}")
        print(f" - Summary stats: {txt_path}")

if __name__ == "__main__":
    main()


=== Testing N=500 ===
Run 1/30 for N=500
Run 2/30 for N=500
Run 3/30 for N=500
Run 4/30 for N=500
Run 5/30 for N=500
Run 6/30 for N=500
Run 7/30 for N=500
Run 8/30 for N=500
Run 9/30 for N=500
Run 10/30 for N=500
Run 11/30 for N=500
Run 12/30 for N=500
Run 13/30 for N=500
Run 14/30 for N=500
Run 15/30 for N=500
Run 16/30 for N=500
Run 17/30 for N=500
Run 18/30 for N=500
Run 19/30 for N=500
Run 20/30 for N=500
Run 21/30 for N=500
Run 22/30 for N=500
Run 23/30 for N=500
Run 24/30 for N=500
Run 25/30 for N=500
Run 26/30 for N=500
Run 27/30 for N=500
Run 28/30 for N=500
Run 29/30 for N=500
Run 30/30 for N=500
N=500 impl=python     mean_time=9.428243s (±0.404483) mean_gflops=0.027 (±0.001)
N=500 impl=numpy      mean_time=0.001962s (±0.000745) mean_gflops=137.830 (±31.110)
N=500 impl=scipy      mean_time=0.019738s (±0.009969) mean_gflops=17.571 (±11.909)
N=500 impl=cupy_fp32  mean_time=0.000336s (±0.000727) mean_gflops=1196.210 (±223.459)
N=500 impl=cupy_fp64  mean_time=0.004701s (±0.002251

: 

: 

In [5]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# Load the data
df = pd.read_csv('/home/adithya/Desktop/SEM3/DEEP_LEARNING(COL_775)/Home_Works/HW3/results/results.csv')

df.columns = df.columns.str.strip()

for col in ['Time', 'GFLOPS', 'time_s', 'gflops']:
    if col in df.columns:
        df[col] = pd.to_numeric(df[col], errors='coerce')


time_col = 'time_s' if 'time_s' in df.columns else 'Time'
gflops_col = 'gflops' if 'gflops' in df.columns else 'GFLOPS'

df = df.dropna(subset=[time_col, gflops_col])

grouped = df.groupby(['impl', 'N']).agg(
    time_mean=(time_col, 'mean'),
    time_std=(time_col, 'std'),
    gflops_mean=(gflops_col, 'mean'),
    gflops_std=(gflops_col, 'std'),
).reset_index()

implementations = grouped['impl'].unique()

for imp in implementations:
    data_impl = grouped[grouped['impl'] == imp]

    plt.figure(figsize=(10,5))
    plt.errorbar(data_impl['N'], data_impl['gflops_mean'], yerr=data_impl['gflops_std'], fmt='-o', capsize=5)
    plt.title(f'GFLOPS vs N for {imp}')
    plt.xlabel('N')
    plt.ylabel('GFLOPS')
    plt.grid(True)
    plt.savefig(f'gflops_vs_N_{imp}.png')
    plt.close()

    plt.figure(figsize=(10,5))
    plt.errorbar(data_impl['N'], data_impl['time_mean'], yerr=data_impl['time_std'], fmt='-o', capsize=5)
    plt.title(f'Time (s) vs N for {imp}')
    plt.xlabel('N')
    plt.ylabel('Time (s)')
    plt.grid(True)
    plt.savefig(f'time_vs_N_{imp}.png')
    plt.close()
