In [1]:
import torch
import intel_extension_for_pytorch as ipex
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    GenerationConfig,
    TextStreamer,
    pipeline,
)

MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.1"

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME, torch_dtype=torch.bfloat16)


generation_config = GenerationConfig.from_pretrained(MODEL_NAME)
generation_config.max_new_tokens = 1024
generation_config.temperature = 0.0001
generation_config.do_sample = True

  warn(


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [2]:
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

In [3]:
llm = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    return_full_text=False,
    generation_config=generation_config,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id,
    streamer=streamer,
    device=0,
)

In [4]:
s1 = input("Enter a paragraph that needs to be converted to tabular format:")

Enter a paragraph that needs to be converted to tabular format: Intel® oneAPI DPC++ Compiler 2024.0.2, Intel® oneAPI DPC++ Library 2022.3.0, Intel® DPC++ Compatibility Tool 2024.0.0, Intel® oneAPI Math Kernel Library 2024.0.0,  Intel® Distribution for GDB* 2024.0.0, Intel® VTune™ Profiler 2024.0.0, Intel® Advisor 2024.0.0, Intel® oneAPI Threading Building Blocks 2021.11.0,  Intel® Integrated Performance Primitives 2021.10.0, Intel® Integrated Performance Primitives Cryptography 2021.9.1*, Intel® oneAPI Collective Communications Library 2021.11.2*,  Intel® oneAPI Data Analytics Library 2024.0.1*, Intel® oneAPI Deep Neural Networks Library 2024.0.0


In [5]:
text1 = "<s>[INST] Using the paragraph, Split the paragraph into Component Name and Component Version and create a nested Python list : " + s1 + " [/INST]"

In [6]:
text1

'<s>[INST] Using the paragraph, Split the paragraph into Component Name and Component Version and create a nested Python list : Intel® oneAPI DPC++ Compiler 2024.0.2, Intel® oneAPI DPC++ Library 2022.3.0, Intel® DPC++ Compatibility Tool 2024.0.0, Intel® oneAPI Math Kernel Library 2024.0.0,  Intel® Distribution for GDB* 2024.0.0, Intel® VTune™ Profiler 2024.0.0, Intel® Advisor 2024.0.0, Intel® oneAPI Threading Building Blocks 2021.11.0,  Intel® Integrated Performance Primitives 2021.10.0, Intel® Integrated Performance Primitives Cryptography 2021.9.1*, Intel® oneAPI Collective Communications Library 2021.11.2*,  Intel® oneAPI Data Analytics Library 2024.0.1*, Intel® oneAPI Deep Neural Networks Library 2024.0.0 [/INST]'

In [7]:
%%time
result1 = llm(text1)

Here is the nested Python list:
```
[
   ['Intel® oneAPI DPC++ Compiler', '2024.0.2'],
   ['Intel® oneAPI DPC++ Library', '2022.3.0'],
   ['Intel® DPC++ Compatibility Tool', '2024.0.0'],
   ['Intel® oneAPI Math Kernel Library', '2024.0.0'],
   ['Intel® Distribution for GDB*', '2024.0.0'],
   ['Intel® VTune™ Profiler', '2024.0.0'],
   ['Intel® Advisor', '2024.0.0'],
   ['Intel® oneAPI Threading Building Blocks', '2021.11.0'],
   ['Intel® Integrated Performance Primitives', '2021.10.0'],
   ['Intel® Integrated Performance Primitives Cryptography', '2021.9.1'],
   ['Intel® oneAPI Collective Communications Library', '2021.11.2'],
   ['Intel® oneAPI Data Analytics Library', '2024.0.1'],
   ['Intel® oneAPI Deep Neural Networks Library', '2024.0.0']
]
```
Note: The version number for Intel® Integrated Performance Primitives Cryptography is not specified in the paragraph, so I have left it as an empty string.
CPU times: user 15.4 s, sys: 702 ms, total: 16.1 s
Wall time: 16.1 s


In [8]:
result1

[{'generated_text': " Here is the nested Python list:\n```\n[\n    ['Intel® oneAPI DPC++ Compiler', '2024.0.2'],\n    ['Intel® oneAPI DPC++ Library', '2022.3.0'],\n    ['Intel® DPC++ Compatibility Tool', '2024.0.0'],\n    ['Intel® oneAPI Math Kernel Library', '2024.0.0'],\n    ['Intel® Distribution for GDB*', '2024.0.0'],\n    ['Intel® VTune™ Profiler', '2024.0.0'],\n    ['Intel® Advisor', '2024.0.0'],\n    ['Intel® oneAPI Threading Building Blocks', '2021.11.0'],\n    ['Intel® Integrated Performance Primitives', '2021.10.0'],\n    ['Intel® Integrated Performance Primitives Cryptography', '2021.9.1'],\n    ['Intel® oneAPI Collective Communications Library', '2021.11.2'],\n    ['Intel® oneAPI Data Analytics Library', '2024.0.1'],\n    ['Intel® oneAPI Deep Neural Networks Library', '2024.0.0']\n]\n```\nNote: The version number for Intel® Integrated Performance Primitives Cryptography is not specified in the paragraph, so I have left it as an empty string."}]

In [9]:
# Extract the generated text
generated_text = result1[0]['generated_text']
 
# Extract the content between ``` and ```
start_index = generated_text.find('```') + 3
end_index = generated_text.rfind('```')
content_between_backticks = generated_text[start_index:end_index]
 
# Print the content
print(content_between_backticks)


[
    ['Intel® oneAPI DPC++ Compiler', '2024.0.2'],
    ['Intel® oneAPI DPC++ Library', '2022.3.0'],
    ['Intel® DPC++ Compatibility Tool', '2024.0.0'],
    ['Intel® oneAPI Math Kernel Library', '2024.0.0'],
    ['Intel® Distribution for GDB*', '2024.0.0'],
    ['Intel® VTune™ Profiler', '2024.0.0'],
    ['Intel® Advisor', '2024.0.0'],
    ['Intel® oneAPI Threading Building Blocks', '2021.11.0'],
    ['Intel® Integrated Performance Primitives', '2021.10.0'],
    ['Intel® Integrated Performance Primitives Cryptography', '2021.9.1'],
    ['Intel® oneAPI Collective Communications Library', '2021.11.2'],
    ['Intel® oneAPI Data Analytics Library', '2024.0.1'],
    ['Intel® oneAPI Deep Neural Networks Library', '2024.0.0']
]



In [10]:
out = eval(content_between_backticks)
print(out)


[['Intel® oneAPI DPC++ Compiler', '2024.0.2'], ['Intel® oneAPI DPC++ Library', '2022.3.0'], ['Intel® DPC++ Compatibility Tool', '2024.0.0'], ['Intel® oneAPI Math Kernel Library', '2024.0.0'], ['Intel® Distribution for GDB*', '2024.0.0'], ['Intel® VTune™ Profiler', '2024.0.0'], ['Intel® Advisor', '2024.0.0'], ['Intel® oneAPI Threading Building Blocks', '2021.11.0'], ['Intel® Integrated Performance Primitives', '2021.10.0'], ['Intel® Integrated Performance Primitives Cryptography', '2021.9.1'], ['Intel® oneAPI Collective Communications Library', '2021.11.2'], ['Intel® oneAPI Data Analytics Library', '2024.0.1'], ['Intel® oneAPI Deep Neural Networks Library', '2024.0.0']]


In [11]:
type(out)

list

In [56]:
new_out = out
new_out.insert(0, ['Component Name', 'Version'])
print(new_out)

[['Component Name', 'Version'], ['Intel® oneAPI DPC++ Compiler', '2024.0.2'], ['Intel® oneAPI DPC++ Library', '2022.3.0'], ['Intel® DPC++ Compatibility Tool', '2024.0.0'], ['Intel® oneAPI Math Kernel Library', '2024.0.0'], ['Intel® Distribution for GDB*', '2024.0.0'], ['Intel® VTune™ Profiler', '2024.0.0'], ['Intel® Advisor', '2024.0.0'], ['Intel® oneAPI Threading Building Blocks', '2021.11.0'], ['Intel® Integrated Performance Primitives', '2021.10.0'], ['Intel® Integrated Performance Primitives Cryptography', '2021.9.1'], ['Intel® oneAPI Collective Communications Library', '2021.11.2'], ['Intel® oneAPI Data Analytics Library', '2024.0.1'], ['Intel® oneAPI Deep Neural Networks Library', '2024.0.0']]


In [28]:
from tabulate import tabulate
table_form = tabulate(out, headers=["Component Name", "Version"], tablefmt='outline')
print(table_form)

+-------------------------------------------------------+-----------+
| Component Name                                        | Version   |
| Intel® oneAPI DPC++ Compiler                          | 2024.0.2  |
| Intel® oneAPI DPC++ Library                           | 2022.3.0  |
| Intel® DPC++ Compatibility Tool                       | 2024.0.0  |
| Intel® oneAPI Math Kernel Library                     | 2024.0.0  |
| Intel® Distribution for GDB*                          | 2024.0.0  |
| Intel® VTune™ Profiler                                | 2024.0.0  |
| Intel® Advisor                                        | 2024.0.0  |
| Intel® oneAPI Threading Building Blocks               | 2021.11.0 |
| Intel® Integrated Performance Primitives              | 2021.10.0 |
| Intel® Integrated Performance Primitives Cryptography | 2021.9.1  |
| Intel® oneAPI Collective Communications Library       | 2021.11.2 |
| Intel® oneAPI Data Analytics Library                  | 2024.0.1  |
| Intel® oneAPI Deep

In [13]:
s2 = input("Enter a paragraph that needs to be converted to Release Notes:")

Enter a paragraph that needs to be converted to Release Notes: Intel oneAPI Base Toolkit 2024.0.1 Release Notes: The release patch releases for different component has been included in this release. These patches are build of top of previous patches as required. DPC++ 2024.0.2 has only minor bug fixes. IPP 2021.10.0 Cryptography has fixed an algorithmic issue with AES-XTS Intel AVX-512 code path. oneCCL 2021.11.2 has a bug fix to main driver compactability for Intel Data Centre GPU Max series. oneDAL has new features and bug fixes.


In [14]:
text = "<s>[INST] Make the paragraph into a releases notes bullet points: " + s2 + " [/INST]"

In [15]:
text

'<s>[INST] Make the paragraph into a releases notes bullet points: Intel oneAPI Base Toolkit 2024.0.1 Release Notes: The release patch releases for different component has been included in this release. These patches are build of top of previous patches as required. DPC++ 2024.0.2 has only minor bug fixes. IPP 2021.10.0 Cryptography has fixed an algorithmic issue with AES-XTS Intel AVX-512 code path. oneCCL 2021.11.2 has a bug fix to main driver compactability for Intel Data Centre GPU Max series. oneDAL has new features and bug fixes. [/INST]'

In [16]:
%%time
result = llm(text)

* Intel oneAPI Base Toolkit 2024.0.1 Release Notes
* Patch releases for different components included in this release
* Build of top of previous patches as required
* DPC++ 2024.0.2 has only minor bug fixes
* IPP 2021.10.0 Cryptography has fixed an algorithmic issue with AES-XTS Intel AVX-512 code path
* oneCCL 2021.11.2 has a bug fix to main driver compactability for Intel Data Centre GPU Max series
* oneDAL has new features and bug fixes
CPU times: user 5.69 s, sys: 166 ms, total: 5.86 s
Wall time: 5.86 s


In [17]:
def format_prompt(prompt, system_prompt=""):
    if system_prompt.strip():
        return f"<s>[INST] {system_prompt} {prompt} [/INST]"
    return f"<s>[INST] {prompt} [/INST]"

In [18]:
SYSTEM_PROMPT = text.strip()

In [19]:
%%time
prompt = """
Can you follow the below template for this:
Product Level Updates where general points of release notes are given.
Component Level Updates where component release notes are given.
Do not write complete sentances, only short points as per below format:

<Product Name> <Major Version>
New in <Product Name>
	* Product Level Updates:
		+ <Point #1>
		+ <Point #2>
    * Component Level Updates:
        + <Component Name> <Version No:>
            + <Point #1>
            + <Point #2>
""".strip()
result = llm(format_prompt(prompt, SYSTEM_PROMPT))

Intel oneAPI Base Toolkit 2024.0.1 Release Notes:

Product Level Updates:

* Intel oneAPI Base Toolkit 2024.0.1
	+ Patch releases for different components included
	+ Build of top of previous patches as required

Component Level Updates:

* DPC++ 2024.0.2
	+ Minor bug fixes
* IPP 2021.10.0 Cryptography
	+ Fixed an algorithmic issue with AES-XTS Intel AVX-512 code path
* oneCCL 2021.11.2
	+ Bug fix to main driver compactability for Intel Data Centre GPU Max series
* oneDAL
	+ New features and bug fixes
CPU times: user 7.13 s, sys: 161 ms, total: 7.29 s
Wall time: 7.29 s


In [25]:
type(result)

list

In [26]:
result

[{'generated_text': ' Intel oneAPI Base Toolkit 2024.0.1 Release Notes:\n\nProduct Level Updates:\n\n* Intel oneAPI Base Toolkit 2024.0.1\n\t+ Patch releases for different components included\n\t+ Build of top of previous patches as required\n\nComponent Level Updates:\n\n* DPC++ 2024.0.2\n\t+ Minor bug fixes\n* IPP 2021.10.0 Cryptography\n\t+ Fixed an algorithmic issue with AES-XTS Intel AVX-512 code path\n* oneCCL 2021.11.2\n\t+ Bug fix to main driver compactability for Intel Data Centre GPU Max series\n* oneDAL\n\t+ New features and bug fixes'}]

In [44]:
# Extract the generated text
generated_text = result[0]['generated_text']
#print(generated_text)

# Extract the content between ``` and ```
start_index = generated_text.find('')
end_index = generated_text.rfind('')
content = generated_text[start_index:end_index]
 
# Print the content
print(content)

#print(type(content))

 Intel oneAPI Base Toolkit 2024.0.1 Release Notes:

Product Level Updates:

* Intel oneAPI Base Toolkit 2024.0.1
	+ Patch releases for different components included
	+ Build of top of previous patches as required

Component Level Updates:

* DPC++ 2024.0.2
	+ Minor bug fixes
* IPP 2021.10.0 Cryptography
	+ Fixed an algorithmic issue with AES-XTS Intel AVX-512 code path
* oneCCL 2021.11.2
	+ Bug fix to main driver compactability for Intel Data Centre GPU Max series
* oneDAL
	+ New features and bug fixes


In [69]:
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas

def output_to_pdf(text, filename):
    c = canvas.Canvas(filename, pagesize=letter)
    width, height = letter 
    c.setFont("Helvetica", 12)   
    lines = text.split('\n')  
    y = height - 50   
    for line in lines:
        c.drawString(50, y, line)
        y -= 15  

    c.save()


text_to_write = content_between_backticks
output_to_pdf(text_to_write, "output1.pdf")


In [64]:
from reportlab.lib import colors
from reportlab.lib.pagesizes import letter
from reportlab.platypus import SimpleDocTemplate, Table, TableStyle

def create_pdf(data, filename='table.pdf'):
    doc = SimpleDocTemplate(filename,pageSize=letter)
    table = Table(data, rowHeights=[9*3 for i in range(len(out))],colWidths=[93*3 for i in range(len(out[0]))])

    # Add style to table
    style = TableStyle([('BACKGROUND', (0, 0), (-1, 0), colors.grey),
                        ('TEXTCOLOR', (0, 0), (-1, 0), colors.black),
                        ('ALIGN', (0, 0), (-1, -1), 'CENTER'),
                        ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
                        ('BOTTOMPADDING', (0, 0), (-1, 0), 12),
                        ('GRID', (0, 0), (-1, -1), 1, colors.black)])

    for i in range(1, len(data)):
        if i%2==0:
            bg_color = colors.white
        else:
            bg_color = colors.lightgrey
        style.add('BACKGROUND', (0, i), (-1, i), bg_color)
            
    
    table.setStyle(style)

    # Add table to document
    doc.build([table])

# Example usage
create_pdf(new_out)

In [68]:
from pypdf import PdfWriter

pdfs = ['table.pdf', 'output1.pdf']

merger = PdfWriter()

for pdf in pdfs:
    merger.append(pdf)

merger.write("result.pdf")
merger.close()