Matrix Multiplication (1000 × 1000) Code of  of all language

Pyhton - Matrix Multiplication

In [None]:
import time
import os
import psutil
import random

def matrix_multiply(n=1000):
    process = psutil.Process(os.getpid())

    # Generate 1000×1000 matrices A and B with random values
    A = [[random.random() for _ in range(n)] for _ in range(n)]
    B = [[random.random() for _ in range(n)] for _ in range(n)]
    C = [[0.0] * n for _ in range(n)]

    # Record initial memory (MB)
    memory_before = process.memory_info().rss / (1024 * 1024)

    # Start timing
    wall_start = time.time()
    cpu_start = time.process_time()

    # Optimized matrix multiplication using i-k-j loop order
    for i in range(n):
        for k in range(n):
            temp = A[i][k]
            for j in range(n):
                C[i][j] += temp * B[k][j]

    # End timing
    wall_end = time.time()
    cpu_end = time.process_time()

    # Record final memory (MB)
    memory_after = process.memory_info().rss / (1024 * 1024)
    peak_memory = max(memory_before, memory_after)

    # Output results
    print("\n--- Matrix Multiplication Report ---")
    print(f"Matrix size        : {n} x {n}")
    print(f"Execution time     : {wall_end - wall_start:.3f} seconds")
    print(f"CPU time           : {cpu_end - cpu_start:.3f} seconds")
    print(f"Peak memory usage  : {peak_memory:.2f} MB")

if __name__ == "__main__":
    matrix_multiply()


Java - Matrix Multiplication

In [None]:
import java.util.Random;
import java.lang.management.ManagementFactory;
import java.lang.management.ThreadMXBean;

public class MatrixMultiply {
    public static void main(String[] args) {
        int n = 1000;
        double[][] A = new double[n][n];
        double[][] B = new double[n][n];
        double[][] C = new double[n][n];

        Random rand = new Random(42);
        for (int i = 0; i < n; i++)
            for (int j = 0; j < n; j++) {
                A[i][j] = rand.nextDouble();
                B[i][j] = rand.nextDouble();
            }

        Runtime runtime = Runtime.getRuntime();
        runtime.gc();
        long memBefore = runtime.totalMemory() - runtime.freeMemory();

        ThreadMXBean bean = ManagementFactory.getThreadMXBean();
        long cpuStart = bean.getCurrentThreadCpuTime();
        long wallStart = System.nanoTime();

        for (int i = 0; i < n; i++)
            for (int j = 0; j < n; j++)
                for (int k = 0; k < n; k++)
                    C[i][j] += A[i][k] * B[k][j];

        long wallEnd = System.nanoTime();
        long cpuEnd = bean.getCurrentThreadCpuTime();
        runtime.gc();
        long memAfter = runtime.totalMemory() - runtime.freeMemory();

        System.out.printf("Execution time: %.3f s\n", (wallEnd - wallStart) / 1e9);
        System.out.printf("CPU time: %.3f s\n", (cpuEnd - cpuStart) / 1e9);
        System.out.printf("Memory used: %.2f MB\n", Math.max(memBefore, memAfter) / (1024.0 * 1024.0));
    }
}


Go - Matrix Multiplication

In [None]:
package main

import (
	"fmt"
	"math/rand"
	"os"
	"runtime"
	"runtime/pprof"
	"time"
)

func matrixMultiply() {
	const n = 1000
	const blockSize = 64 // Block size for tiling, adjustable (32, 64, 128)

	// Initialize matrices
	A := make([][]float64, n)
	B := make([][]float64, n)
	C := make([][]float64, n)
	for i := 0; i < n; i++ {
		A[i] = make([]float64, n)
		B[i] = make([]float64, n)
		C[i] = make([]float64, n)
		for j := 0; j < n; j++ {
			A[i][j] = rand.Float64()
			B[i][j] = rand.Float64()
		}
	}

	// Memory measurement before
	var memStats runtime.MemStats
	runtime.GC()
	runtime.ReadMemStats(&memStats)
	memBefore := float64(memStats.Alloc) / (1024 * 1024)

	// Start timing
	start := time.Now()

	// Tiled matrix multiplication
	for i := 0; i < n; i += blockSize {
		for j := 0; j < n; j += blockSize {
			for k := 0; k < n; k += blockSize {
				// Process block
				for ii := i; ii < min(i+blockSize, n); ii++ {
					for jj := j; jj < min(j+blockSize, n); jj++ {
						for kk := k; kk < min(k+blockSize, n); kk++ {
							C[ii][jj] += A[ii][kk] * B[kk][jj]
						}
					}
				}
			}
		}
	}

	// End timing and memory measurement
	duration := time.Since(start)
	runtime.GC()
	runtime.ReadMemStats(&memStats)
	memAfter := float64(memStats.Alloc) / (1024 * 1024)

	// Output results
	fmt.Printf("Execution time: %.3f s\n", duration.Seconds())
	fmt.Printf("Memory used: %.2f MB\n", max(memBefore, memAfter))
	fmt.Println("CPU time: See 'cpu.prof' file using `go tool pprof`")
}

func min(a, b int) int {
	if a < b {
		return a
	}
	return b
}

func max(a, b float64) float64 {
	if a > b {
		return a
	}
	return b
}

func main() {
	// Start CPU profiling
	f, err := os.Create("cpu.prof")
	if err != nil {
		fmt.Println("Could not create CPU profile:", err)
		return
	}
	defer f.Close()
	pprof.StartCPUProfile(f)
	defer pprof.StopCPUProfile()

	matrixMultiply()
}


C++ - Matrix Multiplication

In [None]:
#include <iostream>
#include <vector>
#include <random>
#include <chrono>
#include <windows.h>
#include <psapi.h>

void matrix_multiply() {
    const int n = 1000;
    std::vector<std::vector<double>> A(n, std::vector<double>(n));
    std::vector<std::vector<double>> B(n, std::vector<double>(n));
    std::vector<std::vector<double>> C(n, std::vector<double>(n, 0.0));

    std::mt19937 gen(42);
    std::uniform_real_distribution<> dis(0.0, 1.0);

    for (int i = 0; i < n; i++)
        for (int j = 0; j < n; j++) {
            A[i][j] = dis(gen);
            B[i][j] = dis(gen);
        }

    PROCESS_MEMORY_COUNTERS memInfo;
    GetProcessMemoryInfo(GetCurrentProcess(), &memInfo, sizeof(memInfo));
    double memBefore = memInfo.WorkingSetSize / (1024.0 * 1024.0);

    auto start = std::chrono::high_resolution_clock::now();
    clock_t cpuStart = clock();

    for (int i = 0; i < n; i++)
        for (int j = 0; j < n; j++)
            for (int k = 0; k < n; k++)
                C[i][j] += A[i][k] * B[k][j];

    clock_t cpuEnd = clock();
    auto end = std::chrono::high_resolution_clock::now();
    GetProcessMemoryInfo(GetCurrentProcess(), &memInfo, sizeof(memInfo));
    double memAfter = memInfo.WorkingSetSize / (1024.0 * 1024.0);

    std::cout << "Execution time: " << std::chrono::duration<double>(end - start).count() << " s\n";
    std::cout << "CPU time: " << (cpuEnd - cpuStart) / (double)CLOCKS_PER_SEC << " s\n";
    std::cout << "Memory used: " << std::max(memBefore, memAfter) << " MB\n";
}

int main() {
    matrix_multiply();
    return 0;
}
