In [None]:
# Allow src folder to be imported from this notebook
import sys
from pathlib import Path

module_path = str(Path.cwd().parents[0])
if module_path not in sys.path:
    sys.path.append(module_path)

# ILP Formulation to cover an MSA with Blocks
___

- [ ] make faster the generation of constraints

In [None]:
import json
import gurobipy as gp
from gurobipy import GRB
from src.blocks import Block
from src.msa import AnalyzerMSA

import numpy as np
from collections import defaultdict

# pangenome graph
from src.graph import (
    nodes_edges_from_blocks, 
    PlotGraph
)
from src.graph.bandage_labels_from_gfa import bandage_labels

In [None]:
# MSA
NAME_MSA = "toyexample"
amsa = AnalyzerMSA()
path_msa = f"../msas/{NAME_MSA}.fa"
align, n_seqs, n_cols = amsa.load_msa(path_msa)
n_seqs, n_cols

In [None]:
# Create the model
model = gp.Model("pangeblocks")

# define variables
C = model.addVars(blocks, vtype=GRB.BINARY, name="C")
U = model.addVars(msa_positions, vtype=GRB.BINARY, name="U")

# Constraints: 
for r,c in msa_positions:

    # subset of blocks that covers the position [r,c]
    subset_C = [ C[K,i,j] for K,i,j in blocks if str(r) in K.split(",") and i<=c<=j ]
    if (r,c) in [(0,0),(2,4)]:
        print(f"{(r,c)}:len {len(subset_C)}")
    if len(subset_C)>0:
        # print(f"{len(subset_C)} blocks cover the position {(r,c)}")
        
        ## 1. each position in the MSA is covered ONLY ONCE
        model.addConstr( U[r,c] <= sum(subset_C), name=f"constraint1({r},{c})")
        
        ## 2. each position of the MSA is covered AT LEAST by one block
        model.addConstr( U[r,c] >= 1, name=f"constraint2({r},{c})")


## 3. overlapping blocks cannot be chosen
# sort all blocks, 
blocks = sorted(blocks, key=lambda b: b[1]) # sort blocks by the starting position (K,start,end)

# and analyze the intersections while update the constraints
names_constraint3=[]
for pos1,block1 in enumerate(blocks[:-1]):
    # compare against the next blocks in the sorted list
    for rel_pos, block2 in enumerate(blocks[pos1+1:]):
        pos2 = rel_pos + pos1 + 1
        block2 = blocks[pos2]
        
        # check for not empty intersection, otherwise, skip to the next block  
        # note: set K is a string with the rows concatenated by a "," (due to Gurobi requirements to index the variables)
        block1_K = block1[0].split(",")
        block2_K = block2[0].split(",")

        # check for not empty intersection, otherwise skip to the next block1 in the list
        common_rows = list(set(block1_K).intersection(set(block2_K))) # intersection set K
        common_cols = list(set(range(block1[1],block1[2]+1)).intersection(set(range(block2[1],block2[2]+1)))) # intersection columns [i,j]

        if (common_rows and common_cols):
            
            # if the blocks intersect, then create the restriction 
            K1,i1,j1=block1
            K2,i2,j2=block2
            name_constraint=f"constraint3({K1},{i1},{j1})-({K2},{i2},{j2})"
            model.addConstr(C[block1] + C[block2] <= 1 , name=name_constraint)
            names_constraint3.append(name_constraint)

# Objective function
model.setObjective(C.sum('*','*','*'), GRB.MINIMIZE)

model.optimize()

Path("ilp-models").mkdir(exist_ok=True)
model.write(f"ilp-models/{NAME_MSA}.lp")

In [None]:
"constraint3(1,2,0,3)-(0,1,1,5)" in names_constraint3

In [None]:
solution_C = model.getAttr("X", C)
solution_U = model.getAttr("X",U)
len(solution_C)>0, len(solution_U)>0

In [None]:
used_blocks = []
for k,v in solution_C.items(): 
    K,i,j=k
    if v > 0:
        used_blocks.append(
            Block(eval(f"({K},)"),i,j, strings_[K,i,j])
        )

___

In [1]:
# Allow src folder to be imported from this notebook
import sys
from pathlib import Path

module_path = str(Path.cwd().parents[0])
if module_path not in sys.path:
    sys.path.append(module_path)

In [3]:
import json
from src.blocks import Block
from src.ilp.input import InputBlockSet
from src.ilp.optimization import Optimization
from src.ilp.variaton_graph_parser import asGFA
NAME_MSA = "toyexample"
# Load set of decomposed blocks
path_blocks = f"../experiment/block_decomposition/{NAME_MSA}.json"
path_msa=f"../msas/{NAME_MSA}.fa"

with open(path_blocks) as fp:
    blocks = [Block(*block) for block in json.load(fp)] 

inputset_gen = InputBlockSet()
inputset = inputset_gen(path_msa, blocks)

opt = Optimization(blocks=inputset, path_msa=path_msa)
opt_coverage = opt()

parser=asGFA()
parser(opt_coverage,path_gfa=f"../experiment/gfa/{NAME_MSA}.gfa")

[Block(K=(0,), i=0, j=0, label='A'), Block(K=(2,), i=4, j=4, label='G')]
Gurobi Optimizer version 9.5.2 build v9.5.2rc0 (linux64)
Thread count: 32 physical cores, 64 logical processors, using up to 32 threads
Optimize a model with 1026 rows, 43 columns and 2078 nonzeros
Model fingerprint: 0x56c8bd8f
Variable types: 0 continuous, 43 integer (43 binary)
Coefficient statistics:
  Matrix range     [1e+00, 1e+00]
  Objective range  [1e+00, 1e+00]
  Bounds range     [1e+00, 1e+00]
  RHS range        [1e+00, 1e+00]
Found heuristic solution: objective 16.0000000
Presolve removed 1026 rows and 43 columns
Presolve time: 0.00s
Presolve: All rows and columns removed

Explored 0 nodes (0 simplex iterations) in 0.01 seconds (0.00 work units)
Thread count was 1 (of 64 available processors)

Solution count 2: 6 16 

Optimal solution found (tolerance 1.00e-04)
Best objective 6.000000000000e+00, best bound 6.000000000000e+00, gap 0.0000%
Not consecutive blocks
Condicion- consecutive blocks
Not consecuti

In [None]:
# constraint = model.getConstrByName("constraint2(1,28)")
# print(f"{model.getRow(constraint)} {constraint.Sense} {constraint.RHS}")