<span style="color:red; font-family:Helvetica Neue, Helvetica, Arial, sans-serif; font-size:2em;">An Exception was encountered at '<a href="#papermill-error-cell">In [8]</a>'.</span>

In [1]:
import os
import datetime
import gc
import torch.quantization
from ptflops import get_model_complexity_info


def timestamp():
    print(datetime.datetime.now().strftime("%b %d %Y, %H:%M:%S"))

# Prune Model

In [2]:
from transformers import pipeline, AutoConfig
from colbert.modeling.colbert import colbert_score
from colbert.modeling.checkpoint import Checkpoint
from colbert.infra import Run, RunConfig, ColBERTConfig
from colbert import Trainer, Indexer, Searcher
from transformers import AutoTokenizer
from colbert.data import Queries
import torch
from tqdm.auto import tqdm
import re


In [3]:
def filter_layers(name, prune_type, ignore_bias=True):
    if name.startswith('model.bert.embeddings') \
        or 'LayerNorm' in name: 
            return True
    if ignore_bias and name.endswith('bias'):
        return True
    if prune_type == "dense":
        if "attention" in name:
            return True
    elif "attention" in prune_type:
        if "attention" not in name:
            return True
        if "no_dense" in prune_type and "dense" in name:
            return True
    return False

In [4]:
def quantization_data_new(config, quant_type, quant_Int):
    use_iter = "v2.0"
    
    use_full_data = False
    nbits = 2
    k = 1000
    maxsteps = 10000

    base_path = fr"experiments/"
    maxsteps_str=f"10.000"

    base_path = fr"experiments/"
    maxsteps_str=f"{maxsteps:,}".replace(',','.')
    experiment = f"msmarco_{maxsteps_str}"
    if use_full_data:
        experiment += f".data=full"




    #use_iter_str=f"{use_iter:,}".replace(',','.')
    index_name = f"msmarco_{maxsteps_str}{'.data=full' if use_full_data else ''}.nbits={nbits}"


    checkpoint = fr"experiments/model_dump/colbert{use_iter}" 
    retrieval_name = f"{index_name}.ranking={k}.tsv"




    if not os.path.exists(checkpoint):
        #anil checkpoint = fr"{base_path}/checkpoints/colbert"
        print(f"Couldn't find checkpoint. Using default checkpoint: {checkpoint}")
        checkpoint = fr"experiments/model_dump/colbertv2.0"

    config = ColBERTConfig(
        bsize = 64,
        root=base_path,
        experiment=experiment,


        #anil triples=r"../data/triples.train.small.id.json",
        #anil collection= r"../data/collection.tsv",
        triples=r"../kngo/data/triples.train.small.id.json",
        collection= r"../kngo/data/collection.tsv",

        checkpoint=checkpoint,
        nbits=nbits,
        overwrite='resume',
        index_name=index_name,
        index_path=fr"/home/ubuntu/capstone/colbert/experiments/indexes/msmarco_10.000.nbits=2",

        rank = 0,
        nranks = 1,
        amp = True,
        gpus = 1,
    )

    print("index_name=",index_name)
        
    for q_type in quant_type:
        print(f"pruning model on prune type {q_type} to: {quant_Int}")
        with Run().context(RunConfig(nranks=config.nranks, experiment=config.experiment)):
            model = Checkpoint(config.checkpoint, colbert_config=config)

        #device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        #model.to(device)
        model_state_dict = model.state_dict()
        #for key, weight in model_state_dict.items():
        #    print("key1=",key,",weight1 = ",weight)
        quantized_model = torch.quantization.quantize_dynamic(model,q_type , dtype=quant_Int)
        #quantized_model.save("pegasus-quantized-config")
        #model.config.save_pretrained("pegasus-quantized-config")
        #quantized_model.model.save_pretrained("pegasus-quantized-config")
        quantized_state_dict = quantized_model.state_dict()
        #quantized_state_dict1 = {key.replace('model.', ''): quantized_state_dict.pop(key) for key in quantized_state_dict.keys()}
        #torch.save(quantized_state_dict, "pytorch_model.pt")

        #print(model)
        #print(quantized_model)
        #checkpoint = fr"pegasus-quantized-config"
        #quantized_model.save(f"lalal")
        def print_size_of_model(model):
            torch.save(model.state_dict(), "temp.p")
            print('Size (MB):', os.path.getsize("temp.p")/1e6)
            os.remove('temp.p')
            
        #for key, weight in quantized_state_dict.items():
        #    print("key2=",key,",weight2 = ",weight)
       
        print_size_of_model(model)
        print_size_of_model(quantized_model)
        
        if do_retrieval:
            timestamp()
            gc.collect()
            config.set("queries", r"../kngo/data/queries.dev.tsv")
            
  
            with Run().context(RunConfig(nranks=config.nranks, experiment=config.experiment, name='retrieval', overwrite = True)):
                
                config.checkpoint = model
                model.to('cpu')
                searcher = Searcher(index=config.index_name, config=config, checkpoint=model)
                queries = Queries(config.queries)
                count = 0
                while(count !=10):
                    print(f"Base model #", count)
                    ranking = searcher.search_all(queries, k=k)
                    count = count + 1
                #ranking.save(f"msmarco.{use_iter}.nbits={config.nbits}.prune={prune_amount}.prune_type={prune_type}.ranking={k}.tsv")
                #ranking.save(retrieval_name)
            timestamp()

            del searcher, queries, ranking
            gc.collect()
            
            with Run().context(RunConfig(nranks=config.nranks, experiment=config.experiment, name='retrieval', overwrite = True)):
                
                config.checkpoint = quantized_model
                quantized_model.to('cpu')
                searcher = Searcher(index=config.index_name, config=config, checkpoint=quantized_model)
                queries = Queries(config.queries)
                count = 0
                while(count !=10):
                    print(f"Quantized model #", count)
                    ranking = searcher.search_all(queries, k=k)
                    count = count + 1
                #ranking.save(f"msmarco.{use_iter}.nbits={config.nbits}.prune={prune_amount}.prune_type={prune_type}.ranking={k}.tsv")
                #ranking.save(retrieval_name)
            timestamp()

            del searcher, queries, ranking
            gc.collect()
             

        if do_eval:
            #!python -m utility.evaluate.msmarco_passages \
            #     --ranking "experiments/msmarco_{maxsteps_str}/retrieval/msmarco.{use_iter}.nbits={config.nbits}.prune={prune_amount}.prune_type={prune_type}.ranking={k}.tsv" \
            #     --qrels "../data/qrels.dev.tsv" > "experiments/msmarco_{maxsteps_str}/retrieval/msmarco.{use_iter}.nbits={config.nbits}.prune={prune_amount}.prune_type={prune_type}.ranking={k}.tsv.log"
            !python -m utility.evaluate.msmarco_passages \
                --ranking "experiments/{experiment}/none/retrieval/{retrieval_name}" \
                --qrels "../kngo/data/qrels.dev.tsv" #> "experiments/{experiment}/retrieval/{retrieval_name}.log"
        del model,quantized_model
        gc.collect()

In [5]:
def quantization_data(config, quant_type, quant_Int):

    for q_type in quant_type:
        print(f"pruning model on prune type {q_type} to: {quant_Int}")
        with Run().context(RunConfig(nranks=config.nranks, experiment=config.experiment)):
            model = Checkpoint(config.checkpoint, colbert_config=config)

        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        model.to(device)
        quantized_model = torch.quantization.quantize_dynamic(model,q_type , dtype=quant_Int)
        quantized_model.save(f"{checkpoint}.quant={quant_Int}.quant_type={q_type}")
        del model,quantized_model
        gc.collect()

In [6]:

quantization_Int = [torch.qint8]
quantization_Type = [{torch.nn.Linear}]



In [None]:

#if not os.path.exists(checkpoint):
#    checkpoint = fr"{base_path}/checkpoints/colbert"
base_path = fr"experiments/"
checkpoint = fr"experiments/model_dump/colbertv2.0"

do_retrieval = True
do_eval = True
    
config = ColBERTConfig(
        bsize = 64,
        root=base_path,
    
        triples=r"../kngo/data/triples.train.small.id.json",
        collection= r"../kngo/data/collection.tsv",
        
        checkpoint = checkpoint,
        overwrite='resume',
    
        ncells= 10,
    
        rank = 0,
        nranks = 1,
        amp = True,
        gpus = 1,
    )

for q_Int in quantization_Int:
    quantization_data_new(config, quantization_Type, q_Int )
print("quantization complete")



index_name= msmarco_10.000.nbits=2
pruning model on prune type {<class 'torch.nn.modules.linear.Linear'>} to: torch.qint8
[Apr 06, 03:04:51] [0] 		 Option1

model type = ['HF_ColBERT']
creating a HF_ColBERT model
We are loading the model1
We are loading the model2
Size (MB): 438.393806
Size (MB): 181.584042
Apr 06 2023, 03:04:55
[Apr 06, 03:04:55] [0] 		 Option1

/home/ubuntu/capstone/colbert/experiments/msmarco_10.000/indexes/ msmarco_10.000.nbits=2
[Apr 06, 03:04:56] #> Loading collection...
0M 1M 2M 3M 4M 5M 6M 7M 8M 
[Apr 06, 03:05:10] #> Loading codec...
[Apr 06, 03:05:10] Loading decompress_residuals_cpp extension (set COLBERT_LOAD_TORCH_EXTENSION_VERBOSE=True for more info)...
[Apr 06, 03:05:11] Loading packbits_cpp extension (set COLBERT_LOAD_TORCH_EXTENSION_VERBOSE=True for more info)...
[Apr 06, 03:05:13] #> Loading IVF...
[Apr 06, 03:05:14] #> Loading doclens...


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████| 354/354 [00:00<00:00, 1213.98it/s]


[Apr 06, 03:05:16] #> Loading codes and residuals...


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 354/354 [00:14<00:00, 24.53it/s]


[Apr 06, 03:05:30] #> Loading the queries from ../kngo/data/queries.dev.tsv ...
[Apr 06, 03:05:30] #> Got 101093 queries. All QIDs are unique.

Base model # 0
Encoding Start
Apr 06 2023, 03:05:30


<span id="papermill-error-cell" style="color:red; font-family:Helvetica Neue, Helvetica, Arial, sans-serif; font-size:2em;">Execution using papermill encountered an exception here and stopped:</span>

In [None]:
for p_amount in prune_amount:
    gc.collect()
    prune_experiment(prune_type, p_amount, maxsteps = 10000,  k = 1000, \
                     do_train = False, do_index = False, do_retrieval = False, do_eval = True, nbits = 2, \
                     use_full_data = False)
print("!!!!all done!!!!!")

In [None]:
#input_ids = ids_tensor([8, 128], 2)
#token_type_ids = ids_tensor([8, 128], 2)
#attention_mask = ids_tensor([8, 128], vocab_size=2)
#dummy_input = (input_ids, attention_mask, token_type_ids)
#traced_model = torch.jit.trace(quantized_model, dummy_input)
#torch.jit.save(traced_model, "bert_traced_eager_quant.pt")
#print("Saved the model")

# save config
#quantized_model.config.save_pretrained("pegasus-quantized-config")
# save state dict
#quantized_state_dict = quantized_model.state_dict()
#torch.save(quantized_state_dict, "pegasus-quantized.pt")


# load config and dummy model
#config = AutoConfig.from_pretrained("pegasus-quantized-config")
#dummy_model = PegasusForConditionalGeneration(config)
#4. quantize dummy model and load state dict
#reconstructed_quantized_model = torch.quantization.quantize_dynamic(
#    dummy_model, {torch.nn.Linear}, dtype=torch.qint8
#)
#reconstructed_quantized_model.load_state_dict(quantized_state_dict)



config.set("queries", r"../kngo/data/queries.dev_clean.tsv" 
           if os.path.exists(r"../kngo/data/queries.dev_clean.tsv") 
           else r"../kngo/data/queries.small.dev.tsv")
with Run().context(RunConfig(nranks=config.nranks, experiment=config.experiment, name='retrieval', overwrite = True)):
    print("lala=",config.index_name, quantized_model, retrieval_name, experiment)

    searcher = Searcher(index=config.index_name, config=config, checkpoint=quantized_model)
    queries = Queries(config.queries)
    ranking = searcher.search_all(queries, k=k)
    #ranking.save(f"msmarco.{use_iter}.nbits={config.nbits}.prune={prune_amount}.prune_type={prune_type}.ranking={k}.tsv")
    ranking.save(retrieval_name)
timestamp()

del searcher, queries, ranking
gc.collect()

!python -m utility.evaluate.msmarco_passages \
    --ranking "experiments/{experiment}/none/retrieval/{retrieval_name}" \
    --qrels "../kngo/data/qrels.dev.tsv" #> "experiments/{experiment}/retrieval/{retrieval_name}.log"



In [None]:
import torch
from transformers import AutoConfig
#experimentsmsmarco_10.000indexesmsmarco_10.000.nbits=2

use_full_data = False
do_train = False
do_index = False
do_retrieval = False
do_eval = True
nbits = 2
use_iter = "v2.0"
k = 1000

base_path = fr"experiments/"
maxsteps_str=f"10.000"
experiment = f"msmarco_{maxsteps_str}"
if use_full_data:
    experiment += f".data=full"




#use_iter_str=f"{use_iter:,}".replace(',','.')
index_name = f"msmarco_{maxsteps_str}{'.data=full' if use_full_data else ''}.nbits={nbits}"


checkpoint = fr"experiments/model_dump/colbert{use_iter}" 
retrieval_name = f"{index_name}.ranking={k}.tsv"




if not os.path.exists(checkpoint):
    #anil checkpoint = fr"{base_path}/checkpoints/colbert"
    print(f"Couldn't find checkpoint. Using default checkpoint: {checkpoint}")
    checkpoint = fr"experiments/model_dump/colbertv2.0"

config = ColBERTConfig(
    bsize = 64,
    root=base_path,
    experiment=experiment,
    name=experiment,


    #anil triples=r"../data/triples.train.small.id.json",
    #anil collection= r"../data/collection.tsv",
    triples=r"../kngo/data/triples.train.small.id.json",
    collection= r"../kngo/data/collection.tsv",

    checkpoint=checkpoint,
    nbits=nbits,
    overwrite='resume',
    index_name=index_name,
    index_path=fr"{base_path}indexes/{index_name}",

    rank = 0,
    nranks = 1,
    amp = True,
    gpus = 1,
)
    


with Run().context(RunConfig(nranks=config.nranks, experiment=config.experiment)):
    model = Checkpoint(config.checkpoint, colbert_config=config)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

# quantize model
quantized_model = torch.quantization.quantize_dynamic(
    model, {torch.nn.Linear}, dtype=torch.qint8
)

def print_size_of_model(model):
    torch.save(model.state_dict(), "temp.p")
    print('Size (MB):', os.path.getsize("temp.p")/1e6)
    os.remove('temp.p')

print_size_of_model(model)
print_size_of_model(quantized_model)

#quantized_model.model.save_pretrained("tmp-t5-small-quantized-config")  # save config
quantized_state_dict = quantized_model.state_dict()
#for key, weight in quantized_state_dict.items():
#    print(key,weight)
#torch.jit.save(quantized_state_dict, "tmp-t5-small-quantized-state-dict.pt")
torch.save(quantized_state_dict, "tmp-t5-small-quantized-state-dict.pt")

# Transform your model into a quantized model
quantized_model = torch.quantization.quantize_dynamic(model, {torch.nn.Linear}, dtype=torch.qint8)
# Load the quantized weights into the quantized model (module in torch)
quantized_model.load_state_dict(torch.load("tmp-t5-small-quantized-state-dict.pt"))

print('Load quantized model')
#quantized_config = AutoConfig.from_pretrained("tmp-t5-small-quantized-config")
#dummy_model = ColBERT(quantized_config)

#reconstructed_quantized_model = torch.quantization.quantize_dynamic(
#    dummy_model, {torch.nn.Linear}, dtype=torch.qint8
#)
#reconstructed_quantized_model.load_state_dict(torch.load("tmp-t5-small-quantized-state-dict.pt"))




In [None]:
from __future__ import absolute_import, division, print_function

import logging
import numpy as np
import os
import random
import sys
import time
import torch

from argparse import Namespace
from torch.utils.data import (DataLoader, RandomSampler, SequentialSampler,
                              TensorDataset)
from tqdm import tqdm
from transformers import (BertConfig, BertForSequenceClassification, BertTokenizer,)
from transformers import glue_compute_metrics as compute_metrics
from transformers import glue_output_modes as output_modes
from transformers import glue_processors as processors
from transformers import glue_convert_examples_to_features as convert_examples_to_features
from torch.quantization import per_channel_dynamic_qconfig
from torch.quantization import quantize_dynamic_jit

global_rng = random.Random()

def ids_tensor(shape, vocab_size, rng=None, name=None):
    #  Creates a random int32 tensor of the shape within the vocab size
    if rng is None:
        rng = global_rng

    total_dims = 1
    for dim in shape:
        total_dims *= dim

    values = []
    for _ in range(total_dims):
        values.append(rng.randint(0, vocab_size - 1))

    return torch.tensor(data=values, dtype=torch.long, device='cpu').view(shape).contiguous()

# Setup logging
logger = logging.getLogger(__name__)
logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s -   %(message)s',
                    datefmt = '%m/%d/%Y %H:%M:%S',
                    level = logging.WARN)

logging.getLogger("transformers.modeling_utils").setLevel(
   logging.WARN)  # Reduce logging

print(torch.__version__)

torch.set_num_threads(1)
print(torch.__config__.parallel_info())