In [None]:
import time

import torch
import torch.nn as nn
from datautils import *
from gptq import *
from modelutils import *
from quant import *



In [None]:
import argparse
from datautils import *

parser = argparse.ArgumentParser()

parser.add_argument(
    'model', type=str,
    help='OPT model to load; pass `facebook/opt-X`.'
)
parser.add_argument(
    'dataset', type=str, choices=['wikitext2', 'ptb', 'c4'],
    help='Where to extract calibration data from.'
)
parser.add_argument(
    '--seed',
    type=int, default=0, help='Seed for sampling the calibration data.'
)
parser.add_argument(
    '--nsamples', type=int, default=128,
    help='Number of calibration data samples.'
)
parser.add_argument(
    '--percdamp', type=float, default=.01,
    help='Percent of the average Hessian diagonal to use for dampening.'
)
parser.add_argument(
        '--nearest', action='store_true',
        help='Whether to run the RTN baseline.'
    ) 
parser.add_argument(
    '--wbits', type=int, default=16, choices=[2, 3, 4, 16],
    help='#bits to use for quantization; use 16 for evaluating base model.'
)
parser.add_argument(
    '--trits', action='store_true',
    help='Whether to use trits for quantization.'
)
parser.add_argument(
    '--groupsize', type=int, default=-1,
    help='Groupsize to use for quantization; default uses full row.'
)
parser.add_argument(
    '--sym', action='store_true',
    help='Whether to perform symmetric quantization.'
)
parser.add_argument(
    '--save', type=str, default='',
    help='Save quantized checkpoint under this name.'
)
parser.add_argument(
    '--load', type=str, default='',
    help='Load quantized model.'
)
parser.add_argument(
    '--benchmark', type=int, default=0,
    help='Number of tokens to use for benchmarking.'
)
parser.add_argument(
    '--check', action='store_true',
    help='Whether to compute perplexity during benchmarking for verification.'
)
parser.add_argument(
    '--new-eval', action='store_true',
    help='Whether to use the new PTB and C4 eval.'
)
parser.add_argument(
    '--faster-kernel', action='store_true',
    help='Whether to use the new faster kernel for benchmarking.'
)
parser.add_argument(
    '--act-order', action='store_true',
    help='Whether to apply the activation order GPTQ heuristic'
)
parser.add_argument(
    '--static-groups', action='store_true',
    help='Whether to use static groups; recommended when using `--actorder` for more efficient inference.'
)




In [None]:

args = parser.parse_args()


dataloader, testloader = get_loaders(
    args.dataset, nsamples=args.nsamples, seed=args.seed, model=args.model, seqlen=model.seqlen
)
if args.load:
    exit()
datasets = ['wikitext2', 'ptb', 'c4'] 
if args.new_eval:
    datasets = ['wikitext2', 'ptb-new', 'c4-new']
for dataset in datasets: 
    dataloader, testloader = get_loaders(
        dataset, seed=args.seed, model=args.model, seqlen=model.seqlen
    )
    print(dataset)
if args.save:
    opt_pack3(model, quantizers)
    torch.save(model.state_dict(), args.save) 
print(args)

In [None]:
from transformers import BloomModel

model = BloomModel.from_pretrained("bigscience/bloom-7b1")


directory_on_my_computer="./"

model.save_pretrained("directory_on_my_computer")