In [36]:
pip install pulp cplex scipy

Defaulting to user installation because normal site-packages is not writeable
distutils: /home1/cphillips/.local/lib/python3.9/site-packages
sysconfig: /home1/cphillips/.local/lib64/python3.9/site-packages[0m
user = True
home = None
root = None
prefix = None[0m
Note: you may need to restart the kernel to use updated packages.


In [37]:
import numpy as np
from scipy.sparse import random as sparse_random
import pulp

# This function generates large feasible LP problems to test and saves them in the .mps format
def generate_feasible_lp(num_vars=100, num_ineq=200, num_eq=50, density=0.05, mps_filename="generated_lp.mps"):
    
    # The number of variables in each constraint with nonzero coefficients will be roughly density * num_vars
    
    rng = np.random.default_rng(0)

    # Step 1: Feasible solution
    x_feas = rng.uniform(low=-10, high=10, size=(num_vars, 1))

    # Step 2: Sparse matrices (convert to dense)
    G_sparse = sparse_random(num_ineq, num_vars, density=density, format='csr', random_state=None)
    A_sparse = sparse_random(num_eq, num_vars, density=density, format='csr', random_state=None)

    G = G_sparse.toarray()
    A = A_sparse.toarray()

    # Step 3: RHS vectors
    h = G @ x_feas + rng.uniform(0.1, 5.0, size=(num_ineq, 1))
    b = A @ x_feas

    # Step 4: Bounds
    l = x_feas - rng.uniform(1, 5, size=(num_vars, 1))
    u = x_feas + rng.uniform(1, 5, size=(num_vars, 1))
    l = np.maximum(l, -1e4)
    u = np.minimum(u, 1e4)

    # Step 5: Objective
    c = rng.normal(size=(num_vars, 1))

    # Step 6: Write to MPS using pulp
    prob = pulp.LpProblem("Feasible_LP", pulp.LpMinimize)
    x_vars = [
        pulp.LpVariable(f"x{i}", lowBound=float(l[i]), upBound=float(u[i]))
        for i in range(num_vars)
    ]
    prob += pulp.lpDot(c.flatten(), x_vars)

    # Inequality constraints: Gx <= h
    for i in range(num_ineq):
        prob += pulp.lpDot(G[i], x_vars) <= float(h[i]), f"ineq_{i}"

    # Equality constraints: Ax = b
    for i in range(num_eq):
        prob += pulp.lpDot(A[i], x_vars) == float(b[i]), f"eq_{i}"

    prob.writeMPS(mps_filename)
    print(f"✅ LP written to: {mps_filename}")

In [38]:
import numpy as np
import cplex
from cplex.exceptions import CplexError

# This function extracts the parameters of the LP from the .mps format to the general form
def mps_to_standard_form(mps_file):
    try:
        cpx = cplex.Cplex(mps_file)
        cpx.set_results_stream(None)  # mute output

        # Number of variables and constraints
        num_vars = cpx.variables.get_num()
        num_constraints = cpx.linear_constraints.get_num()

        # Objective vector (c)
        c = np.array(cpx.objective.get_linear())

        # Constraint matrix
        A_full = cpx.linear_constraints.get_rows()
        senses = cpx.linear_constraints.get_senses()
        rhs = np.array(cpx.linear_constraints.get_rhs())

        A = []
        G = []
        b = []
        h = []

        for i, (row, sense, rhs_i) in enumerate(zip(A_full, senses, rhs)):
            row_vec = np.zeros(num_vars)
            for idx, val in zip(row.ind, row.val):
                row_vec[idx] = val
            if sense == 'E':  # Equality constraint
                A.append(row_vec)
                b.append(rhs_i)
            elif sense == 'G':  # Greater than or equal
                G.append(row_vec)
                h.append(rhs_i)
            elif sense == 'L':  # Less than or equal
                # convert to -Gx ≥ -h
                G.append(-row_vec)
                h.append(-rhs_i)

        A = np.array(A) if A else np.zeros((0, num_vars))
        b = np.array(b) if b else np.zeros(0)
        G = np.array(G) if G else np.zeros((0, num_vars))
        h = np.array(h) if h else np.zeros(0)

        # Bounds
        l = np.array(cpx.variables.get_lower_bounds())
        u = np.array(cpx.variables.get_upper_bounds())

        c = c.reshape(-1, 1)
        h = h.reshape(-1, 1)
        b = b.reshape(-1, 1)
        l = l.reshape(-1, 1)
        u = u.reshape(-1, 1)
        
        return c, G, h, A, b, l, u

    except CplexError as e:
        print("CPLEX Error:", e)
        return None

In [39]:
import numpy as np

# This function iterates the primal-dual hybrid gradient algorithm, without any enhancements, for a specified number of iterations
def pdhg(c, G, h, A, b, l, u, max_iter=1000):
    """
    Solves:
        min cᵀx s.t. Gx ≥ h, Ax = b, l ≤ x ≤ u
    using PDHG algorithm.
    """

    # Define Parameters
    eta = 0.9/np.linalg.norm(np.vstack([G,A]), 2)
    omega = 1

    tau = eta/omega
    sigma = eta*omega
    
    m_eq = A.shape[0]
    m_ineq = G.shape[0]
    n = c.shape[0]

    # Initialize Primal and Dual Variables
    x = np.minimum(np.maximum(np.zeros((n, 1)), l), u)
    y = np.zeros((m_eq, 1))       # for Ax = b
    z = np.zeros((m_ineq, 1))     # for Gx ≥ h → dual z ≥ 0

    for k in range(max_iter):
        
        # Primal update
        x_old = x.copy()
        # Project x onto box constraints l ≤ x ≤ u
        x = np.minimum(np.maximum(x - tau * (c - A.T @ y - G.T @ z), l), u)

        # Dual update
        y += sigma * (b - A @ (2*x - x_old))
        z += sigma * (h - G @ (2*x - x_old))
        z = np.maximum(z, 0)  # project onto constraint z ≥ 0

        
    # Returns minimal objective value, and minimizer estimate in list format
    return (c.T @ x)[0][0], x.T[0].tolist()

In [40]:
# Create a feasible LP problem and save to a 
generate_feasible_lp(num_vars=1000, num_ineq=500, num_eq=500, density=0.05, mps_filename="large_example.mps")

  pulp.LpVariable(f"x{i}", lowBound=float(l[i]), upBound=float(u[i]))
  prob += pulp.lpDot(G[i], x_vars) <= float(h[i]), f"ineq_{i}"
  prob += pulp.lpDot(A[i], x_vars) == float(b[i]), f"eq_{i}"


✅ LP written to: large_example.mps


In [41]:
import cplex

# Solve the LP using either primal simplex, dual simplex, or barrier (interior point).
# Only works for LP with no more than 1000 constraints and no more than 1000 variables
cpx = cplex.Cplex("large_example.mps")
cpx.solve()

# Save the minimizer and minimal objective values for comparison
cpx_obj_val = cpx.solution.get_objective_value()
cpx_min = cpx.solution.get_values()
print("Objective value:", cpx_obj_val)
#print("Minimizer: xᵀ =", cpx_min)


Selected objective sense:  MINIMIZE
Selected objective  name:  OBJ
Selected RHS        name:  RHS
Selected bound      name:  BND
Version identifier: 22.1.2.0 | 2024-12-10 | f4cec290b
CPXPARAM_Read_DataCheck                          1
Tried aggregator 1 time.
Linear dependency checker was stopped due to maximum work limit.
No LP presolve or aggregator reductions.
Presolve time = 0.03 sec. (82.34 ticks)

Iteration log . . .
Iteration:     1   Dual objective     =         -2373.017074
Iteration:    62   Dual objective     =         -1998.000634
Iteration:   124   Dual objective     =         -1939.518523
Iteration:   186   Dual objective     =         -1890.210059
Iteration:   248   Dual objective     =         -1828.087799
Iteration:   310   Dual objective     =         -1761.047066
Iteration:   372   Dual objective     =         -1714.663805
Iteration:   438   Dual objective     =         -1659.087035
Iteration:   504   Dual objective     =         -1601.461618
Iteration:   573   Dual 

In [44]:
# Extract LP parameters from generated example
c, G, h, A, b, l, u = mps_to_standard_form("large_example.mps")
pdhg_obj_val, pdhg_min = pdhg(c, G, h, A, b, l, u, max_iter=100000)
print("Objective Value:", pdhg_obj_val)
#print("Minimizer: xᵀ =",pdhg_min)

# The distance between the two minimizer solutions
# Should be small but won't be incredibly small since the vectors are high dimensional
distance = np.linalg.norm(np.array(pdhg_min) - np.array(cpx_min))
print("Distance:", distance)


Selected objective sense:  MINIMIZE
Selected objective  name:  OBJ
Selected RHS        name:  RHS
Selected bound      name:  BND
Objective Value: -974.553989520326
Distance: 0.016839848880211453


In [1]:
import torch
import time

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Running on device: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'CPU'}")

N = 10000
k = 20
x = torch.randn(N, N, device=device)
y = torch.randn(N, N, device=device)

# Warm up
for _ in range(5):
    _ = torch.matmul(x, y)

start = time.time()
for _ in range(k):
    _ = torch.matmul(x, y)
torch.cuda.synchronize() if torch.cuda.is_available() else None
end = time.time()

elapsed = end - start
total_flops = 2 * (N ** 3) * k
tflops = total_flops / (elapsed * 1e12)

print(f"Time for {k} matrix multiplications of {N}x{N}: {elapsed:.3f} seconds")
print(f"Average time per multiplication: {elapsed/k:.3f} seconds")
print(f"Approximate performance: {tflops:.3f} TFLOPS")

AttributeError: /opt/rocm-6.3.1/lib/libamd_smi.so: undefined symbol: amdsmi_get_gpu_enumeration_info