In [None]:
!pip install openvino-dev optimum-intel transformers torch
!pip install optimum[openvino,nncf] torchvision evaluate

In [None]:
import time

In [None]:
import os
from openvino.runtime import Core

# Check CPU capabilities and AMX support
ie = Core()
cpu_name = ie.get_property("CPU", "FULL_DEVICE_NAME")
flags = os.popen("lscpu | grep Flags").read()
amx_supported = all(flag in flags for flag in ["amx_bf16", "amx_tile", "amx_int8"])

# Output results
print(f"🖥️ CPU Name: {cpu_name}")
print("✅ AMX is supported" if amx_supported else "❌ AMX is not supported")

In [None]:
import time
start_time = time.time()


from optimum.intel import OVModelForSeq2SeqLM
import openvino.properties as props
import openvino.properties.hint as hints
import openvino.properties.streams as streams
from transformers import T5Tokenizer

# Configure optimization settings
ov_config = {
    hints.performance_mode(): hints.PerformanceMode.LATENCY,
    streams.num(): "1",
    props.cache_dir(): "",
    "DYNAMIC_QUANTIZATION_GROUP_SIZE": "32",  # For CPU optimization
    "KV_CACHE_PRECISION": "u8"  # For memory optimization
}

# Load model with optimizations
model_id = "laituan245/molt5-large-smiles2caption"
tokenizer = T5Tokenizer.from_pretrained(model_id)

# Convert and optimize model
model = OVModelForSeq2SeqLM.from_pretrained(
    model_id,
    export=True,
    ov_config=ov_config,
    device="CPU",
    load_in_8bit=True  # Enable int8 quantization
)



end_time = time.time()
execution_time = end_time - start_time
print(f"Execution time: {execution_time}")

In [None]:
import time
start_time = time.time()

# Test inference
input_text = 'C1=CC2=C(C(=C1)[O-])NC(=CC2=O)C(=O)O'
inputs = tokenizer(input_text, return_tensors="pt")
outputs = model.generate(**inputs, max_new_tokens=512)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

end_time = time.time()
execution_time = end_time - start_time
print(f"Execution time: {execution_time}")

In [None]:
import time
start_time = time.time()


from optimum.intel import OVModelForSeq2SeqLM
import openvino.properties as props
import openvino.properties.hint as hints
import openvino.properties.streams as streams
from transformers import T5Tokenizer

# Configure optimization settings
ov_config = {
    hints.performance_mode(): hints.PerformanceMode.LATENCY,
    streams.num(): "1",
    props.cache_dir(): "",
    "DYNAMIC_QUANTIZATION_GROUP_SIZE": "32",  # For CPU optimization
    "KV_CACHE_PRECISION": "u8"  # For memory optimization
}

# Load model with optimizations
model_id = "laituan245/molt5-large-caption2smiles"
tokenizer = T5Tokenizer.from_pretrained(model_id)

# Convert and optimize model
model = OVModelForSeq2SeqLM.from_pretrained(
    model_id,
    export=True,
    ov_config=ov_config,
    device="CPU",
    load_in_8bit=True  # Enable int8 quantization
)



end_time = time.time()
execution_time = end_time - start_time
print(f"Execution time: {execution_time}")

In [None]:
import time
start_time = time.time()

# Test inference
input_text = 'The molecule is a monomethoxybenzene that is 2-methoxyphenol substituted by a hydroxymethyl group at position 4. It has a role as a plant metabolite. It is a member of guaiacols and a member of benzyl alcohols.'
inputs = tokenizer(input_text, return_tensors="pt")
outputs = model.generate(**inputs, max_new_tokens=512)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

end_time = time.time()
execution_time = end_time - start_time
print(f"Execution time: {execution_time}")

In [None]:
!pip install einops

In [None]:
import time
start_time = time.time()

from optimum.intel import OVModelForCausalLM
import openvino.properties as props
import openvino.properties.hint as hints
import openvino.properties.streams as streams
from transformers import AutoTokenizer, GenerationConfig
import torch

# Configure optimization settings
ov_config = {
    hints.performance_mode(): hints.PerformanceMode.LATENCY,
    streams.num(): "1",
    props.cache_dir(): "",
    "DYNAMIC_QUANTIZATION_GROUP_SIZE": "32",  # For CPU optimization
    "KV_CACHE_PRECISION": "u8"  # For memory optimization
}

In [None]:
# # Load model with optimizations
# model_name_or_id = "AI4Chem/CHEMLLM-2b-1_5"
# tokenizer = AutoTokenizer.from_pretrained(model_name_or_id, trust_remote_code=True)

# # Convert and optimize model
# model = OVModelForCausalLM.from_pretrained(
#     model_name_or_id,
#     export=True,
#     ov_config=ov_config,
#     device="CPU",
#     # load_in_8bit=True,  # Int8 quantization for OpenVINO is applied differently
#     torch_dtype=torch.float16,
#     trust_remote_code=True  # Include if the model uses custom code
# )

# end_time = time.time()
# execution_time = end_time - start_time
# print(f"Model load and optimization time: {execution_time} seconds")

In [None]:
# # Inference
# start_time = time.time()

# prompt = "Predict the aqueous solubility (in mg/mL) and explain the factors affecting solubility for: CC1=CC=C(C=C1)C(=O)CCCN"

# inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True)

# generation_config = GenerationConfig(
#     do_sample=True,
#     top_k=10,
#     temperature=0.9,
#     max_new_tokens=500,
#     repetition_penalty=1.2,
#     pad_token_id=tokenizer.eos_token_id
# )

# # Generate output with the optimized model
# outputs = model.generate(
#     **inputs,
#     generation_config=generation_config
# )

# generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
# print(generated_text)

# end_time = time.time()
# execution_time = end_time - start_time
# print(f"Inference time: {execution_time} seconds")