In [1]:
print("Hello, World!")

Hello, World!


In [2]:
import torch
import intel_extension_for_pytorch as ipex
from intel_extension_for_pytorch.quantization import prepare
import transformers
from torch.utils.data import DataLoader, Dataset
import torch.profiler as profiler

# Dummy calibration dataset (replace with your actual dataset)
class DummyDataset(Dataset):
    def __init__(self, num_samples=10):
        self.num_samples = num_samples

    def __len__(self):
        return self.num_samples

    def __getitem__(self, idx):
        return torch.randint(0, 50257, (128,), dtype=torch.long)  # Example input IDs

# Load model (replace '...' with your model path)
model = transformers.AutoModelForCausalLM.from_pretrained("gpt2").eval()

# Quantization with IPEX (using dummy calibration data)
qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping()

# Stage 1: Calibration
calib_dataset = DataLoader(DummyDataset())
example_inputs = torch.randint(0, 50257, (1, 128), dtype=torch.long) #get one sample input from calib_samples
calibration_model = ipex.llm.optimize(
    model.eval(),
    quantization_config=qconfig,
)
prepared_model = prepare(
    calibration_model.eval(), qconfig, example_inputs=example_inputs, inplace=True
)

qconf_summary_file_path = "qconf_summary.json"

with torch.no_grad():
  for _, calib_samples in enumerate(calib_dataset):
    prepared_model(calib_samples)
prepared_model.save_qconf_summary(qconf_summary=qconf_summary_file_path)

# Stage 2: Quantization
model = ipex.llm.optimize(
    model.eval(),
    quantization_config=qconfig,
    qconfig_summary=qconf_summary_file_path,
    dtype=torch.int8, # added dtype
    inplace=True, # added inplace
)

# Generation inference loop
input_ids = torch.randint(0, 50257, (1, 128), dtype=torch.long)  # Example input

with profiler.profile(
    activities=[
        profiler.ProfilerActivity.CPU,
        # profiler.ProfilerActivity.CUDA  # Remove if not using CUDA
    ],
    schedule=profiler.schedule(wait=1, warmup=1, active=3, repeat=1),
    on_trace_ready=profiler.tensorboard_trace_handler('./log/gpt2'),
    record_shapes=True,
    profile_memory=True,
    with_stack=True
) as prof:
    with torch.inference_mode():
        for _ in range(5):
            model.generate(input_ids, max_length=158, num_beams=4)
            prof.step()

print("Profiling complete. Results saved to ./log/gpt2")

ModuleNotFoundError: No module named 'torch'