# GraphReasoning: Scientific Discovery through Knowledge Extraction and Multimodal Graph-based Representation and Reasoning

Markus J. Buehler, MIT, 2024 mbuehler@MIT.EDU

### Example: GraphReasoning: Loading graph and graph analysis

In [1]:
import os
# os.environ["CUDA_VISIBLE_DEVICES"] = "0"
# device='cuda:0'

from tqdm.notebook import tqdm
from IPython.display import display, Markdown
# from huggingface_hub import hf_hub_download
# from GraphReasoning import *


In [2]:
verbatim=False

### Load dataset

In [3]:
import glob

# doc_data_dir = '/home/mkychsu/pool/TSMC/dataset_textbook/'
doc_data_dir = './paper/'
doc_list=[f'{doc_data_dir}dry-etching-technology-for-semiconductors_compress.pdf',
          f'{doc_data_dir}plasma-etching-an-introduction_compress.pdf',
          f'{doc_data_dir}handbook-of-silicon-wafer-cleaning-technology-third-edition_compress.pdf',
          f'{doc_data_dir}Ultraclean Surface Processing of Silicon Wafers - PDF Free Download.pdf',
          f'{doc_data_dir}Atomic Layer Processing_semiconductor.pdf'   
]

doc_data_dir = './paper_new/'

# doc_list=[]

doc_list_all=sorted(glob.glob(f'{doc_data_dir}*.pdf'))

from difflib import SequenceMatcher

def similar(a, b):
    return SequenceMatcher(None, a, b).ratio()

for i, doc in enumerate(doc_list_all):
    if doc in doc_list:
        continue
    # try:
    #     temp_doc = doc_list_all[i+1]
    #     sim = similar(temp_doc.lower(), doc.lower())
    #     if sim < 0.9:
    #         doc_list.append(doc)
    #     else:
    #         if abs(os.stat(doc).st_size - os.stat(temp_doc).st_size)/os.stat(doc).st_size < 1e-3:
    #             print(f'{i}:{sim},\n {doc} \n {temp_doc}')
    #         else:
    #             doc_list.append(doc)
    # except:
    #     pass
    doc_list.append(doc)
    


In [4]:
doc_list

['./paper/dry-etching-technology-for-semiconductors_compress.pdf',
 './paper/plasma-etching-an-introduction_compress.pdf',
 './paper/handbook-of-silicon-wafer-cleaning-technology-third-edition_compress.pdf',
 './paper/Ultraclean Surface Processing of Silicon Wafers - PDF Free Download.pdf',
 './paper/Atomic Layer Processing_semiconductor.pdf',
 './paper_new/Dependences of bottom and sidewall etch rates on biasvoltage and source power during the etching of poly-Si andfluorocarbon polymer usingSF6,\u2008C4F8,andO2plasmas.pdf',
 './paper_new/Etch Mechanism Study in Gate Patterning for 14 nm Node and.pdf',
 './paper_new/Model analysis of the feature profile evolution during Si etching in HBr-containing plasmas.pdf',
 './paper_new/Profile evolution during polysilicon gate etching with low-pressure high-density cl2 hbr o2  plasma chemistries.pdf',
 './paper_new/Silicon etching in a pulsed HBr_O2 plasma. II. Pattern transfer..pdf']

In [5]:
import os
from transformers import AutoModelForCausalLM, AutoTokenizer

from tqdm.notebook import tqdm
from IPython.display import display, Markdown

verbatim=False

data_dir='./GRAPHDATA_TSMC'    
data_dir_output='./GRAPHDATA_TSMC_OUTPUT'

tokenizer_model=f'/home/mkychsu/pool/llm/SEMIKONG-8b-GPTQ'
# embedding_tokenizer = AutoTokenizer.from_pretrained(tokenizer_model, use_fast=False)
# embedding_model = AutoModelForCausalLM.from_pretrained(tokenizer_model, device_map='cuda', torch_dtype='auto', output_hidden_states=True)

embedding_tokenizer = AutoTokenizer.from_pretrained(tokenizer_model,use_fast=False)
embedding_model = AutoModelForCausalLM.from_pretrained(tokenizer_model,output_hidden_states=True).to('cuda')



  from .autonotebook import tqdm as notebook_tqdm
  def forward(ctx, input, qweight, scales, qzeros, g_idx, bits, maxq):
  def backward(ctx, grad_output):
  @custom_fwd(cast_inputs=torch.float16)
CUDA extension not installed.
CUDA extension not installed.
`low_cpu_mem_usage` was None, now set to True since model is quantized.
Loading checkpoint shards: 100%|██████████| 3/3 [00:00<00:00,  3.59it/s]


In [6]:
import networkx as nx

graph_root="5books_70b"
graph_GraphML= f'{data_dir_output}/{graph_root}.graphml'
G = nx.read_graphml(graph_GraphML)

In [7]:
# edges = list(G.out_edges(data=True))
# nodes = set()
# nodes.add(edges[0][0])
# nodes.add(edges[1][0])

# G=G.subgraph(nodes)

In [8]:
from GraphReasoning import load_embeddings, save_embeddings, generate_node_embeddings
embedding_file='TSMC_KG_70b.pkl'
generate_new_embeddings=True
import torch

if os.path.exists(f'{data_dir}/{embedding_file}'):
    generate_new_embeddings=False
    
with torch.no_grad():
    if generate_new_embeddings:

        # try:
        node_embeddings = generate_node_embeddings(G, embedding_tokenizer, embedding_model, )
        # except:
        #     node_embeddings = generate_node_embeddings(nx.DiGraph(), embedding_tokenizer, embedding_model, )

        save_embeddings(node_embeddings, f'{data_dir}/{embedding_file}')

    else:
        filename = f"{data_dir}/{embedding_file}"
        # file_path = hf_hub_download(repo_id=repository_id, filename=filename, local_dir='./')
        # print(f"File downloaded at: {file_path}")
        node_embeddings = load_embeddings(f'{data_dir}/{embedding_file}')

### Set up LLM client:

In [9]:
import autogen, openai
config_list = [
    {
        "model":"Llama3.1",
        "base_url": "http://localhost:8080/v1",
        "api_key":"NULL",
        "max_tokens": 10000
    },
]

2025-02-13 01:04:15,413	INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.
2025-02-13 01:04:17,061	INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.
flaml.automl is not available. Please install flaml[automl] to enable AutoML functionalities.


In [10]:
from openai import OpenAI
class llm:
    def __init__(self, llm_config):
        self.client = OpenAI(api_key=llm_config["api_key"],
                             base_url=llm_config["base_url"],
                             )
        self.model = llm_config["model"]
        self.max_tokens = llm_config["max_tokens"]
        
    def generate_cli(self, system_prompt="You are an expert in this field. Try your best to give a clear and concise answer.", 
                           prompt="Hello world! I am", temperature=0,
                           ):     
        try:
            if system_prompt==None:
                messages=[
                    {"role": "user", "content": prompt},

                ]

            else:
                messages=[
                    {"role": "system",  "content": system_prompt},
                    {"role": "user", "content": prompt},

                ]
            result=self.client.chat.completions.create(
                    model=self.model,
                    messages=messages,
                    temperature=temperature,
                    max_tokens=self.max_tokens,
                )

            return result.choices[0].message.content
        except:
            return ''
        

In [11]:
llm=llm(config_list[0])

In [12]:
generate = llm.generate_cli

In [13]:
# import time
# q="What are the recent methods to do dry etching?"
# start_time = time.time()
# res=generate( system_prompt='You are an expert in semiconductor fields. Try to find the clear relation in the provided information. Skip the authorship information if it is not relevant', 
#          prompt=q, temperature=0.3,  )

# deltat=time.time() - start_time
# print("--- %s seconds ---" % deltat)
# print(res)

In [14]:
# from GraphReasoning import make_graph_from_text
# graph_HTML, graph_GraphML, G, net, output_pdf = make_graph_from_text(res, generate,
#                                                                      chunk_size=1000,chunk_overlap=200,
#                                                                      do_distill=True, data_dir='temp', verbatim=True,
#                                                                      repeat_refine=0)

In [15]:
# doc = doc_list[4]
# print(doc)
# title = doc.split('/')[-1].split('.pdf')[0]
# doc = doc.split('/')
# doc[-2]+=f'_txt'
# doc[-1]=title+f'/{title}.md'
# doc='/'.join(doc)
# graph_root = f'{title}'

# graph_GraphML= f'{data_dir}/{graph_root}_graph.graphml'
    

# G_test = nx.read_graphml(graph_GraphML)

In [16]:
import networkx as nx

from GraphReasoning import make_graph_from_text, add_new_subgraph_from_text, save_embeddings
with torch.no_grad():
    for i, doc in enumerate(doc_list):

        title = doc.split('/')[-1].split('.pdf')[0]
        doc = doc.split('/')
        doc[-2]+=f'_txt'
        doc[-1]=title+f'/{title}.md'
        doc='/'.join(doc)

        graph_root = f'{title}'

        _graph_GraphML= f'{data_dir_output}/{graph_root}_augmented_graphML_integrated.graphml'
        txt=''
        print(f'{doc}')
        if os.path.exists(_graph_GraphML):
            G = nx.read_graphml(_graph_GraphML)
            print(f'Main KG loaded: {_graph_GraphML}, {G}')
            continue


        if os.path.exists(f'{title}_err.txt'):
            print(f'No. {i}: {title} got something wrong.')
            continue

        elif os.path.exists(f'{data_dir}/{graph_root}_graph.graphml'):
            print(f'Found a graph fragment to merge: {graph_root}: {doc}.')
            graph_GraphML = f'{data_dir}/{graph_root}_graph.graphml'

            print(f'Merging graph No. {i}: {doc} to the main one')
            # try:
            _, G, _, node_embeddings, res = add_new_subgraph_from_text('', generate,
                               node_embeddings, embedding_tokenizer, embedding_model,
                               original_graph=G, data_dir_output=data_dir_output, graph_root=graph_root,
                               chunk_size=2000,chunk_overlap=200,
                               do_simplify_graph=True,size_threshold=10,
                               repeat_refine=0,similarity_threshold=0.95,
                               do_Louvain_on_new_graph=True, include_contextual_proximity=False,
                               #whether or not to simplify, uses similiraty_threshold defined above
                               return_only_giant_component=False,
                               save_common_graph=False,G_to_add=None,graph_GraphML_to_add=graph_GraphML,
                               verbatim=True,)

            save_embeddings(node_embeddings, f'{data_dir}/{embedding_file}')
            # except:
                # print(f'No. {i}: {doc} fail to add')

        else:
            # continue

            print(f'Generating a knowledge graph from {doc}')
            with open(doc, "r") as f:
                txt = " ".join(f.read().splitlines())  # separate lines with a single space

            try:
                _, graph_GraphML, _, _, _ = make_graph_from_text(txt,generate,
                                      include_contextual_proximity=False,
                                      graph_root=graph_root,
                                      chunk_size=1000,chunk_overlap=100,
                                      repeat_refine=0,verbatim=False,
                                      data_dir=data_dir,
                                      save_PDF=False,#TO DO
                                     )
            except Exception as e:
                print(f'Something is wrong with No. {i}: {doc}.')
                f = open(f'{title}_err.txt', 'w')
                f.write(f'{e}\n{txt}')
                f.close()          
                pass


./paper_txt/dry-etching-technology-for-semiconductors_compress/dry-etching-technology-for-semiconductors_compress.md
Main KG loaded: ./GRAPHDATA_TSMC_OUTPUT/dry-etching-technology-for-semiconductors_compress_augmented_graphML_integrated.graphml, DiGraph with 724 nodes and 931 edges
./paper_txt/plasma-etching-an-introduction_compress/plasma-etching-an-introduction_compress.md
Main KG loaded: ./GRAPHDATA_TSMC_OUTPUT/plasma-etching-an-introduction_compress_augmented_graphML_integrated.graphml, DiGraph with 3517 nodes and 5168 edges
./paper_txt/handbook-of-silicon-wafer-cleaning-technology-third-edition_compress/handbook-of-silicon-wafer-cleaning-technology-third-edition_compress.md
Main KG loaded: ./GRAPHDATA_TSMC_OUTPUT/handbook-of-silicon-wafer-cleaning-technology-third-edition_compress_augmented_graphML_integrated.graphml, DiGraph with 8315 nodes and 14026 edges
./paper_txt/Ultraclean Surface Processing of Silicon Wafers - PDF Free Download/Ultraclean Surface Processing of Silicon Wafe

...

Now create or load new graph...
Loading or using provided graph... Any txt data provided will be ignored...: None ./GRAPHDATA_TSMC/Silicon etching in a pulsed HBr_O2 plasma. II. Pattern transfer._graph.graphml
--- 0.010283946990966797 seconds ---
Now grow the existing graph...
Now update node embeddings


100%|██████████| 13689/13689 [55:13<00:00,  4.13it/s] 


Now simplify graph.
Start...


100%|██████████| 14283/14283 [00:00<00:00, 2302796.23it/s]


Node to keep and merge: plasma etching <-- plasma etching patterns
Node to keep and merge: plasma etching <-- plasma etching process
Node to keep and merge: plasma etching <-- plasma etching processes
Node to keep and merge: plasma etching <-- plasma etch process
Node to keep and merge: reactive ion etching (rie) <-- reactive ion etch (rie)
Node to keep and merge: microelectronic devices <-- microelectronic fabrication
Node to keep and merge: etch rate <-- etch rates
Node to keep and merge: etch rate <-- etching rate
Node to keep and merge: reactive species <-- reactive radicals
Node to keep and merge: chemical sputtering <-- chemical sputtering rates
Node to keep and merge: etching process <-- etch process
Node to keep and merge: etching process <-- etching
Node to keep and merge: etching process <-- etching processes
Node to keep and merge: etching process <-- etching time
Node to keep and merge: etching process <-- etching profiles
Node to keep and merge: etching process <-- etching

100%|██████████| 13558/13558 [54:38<00:00,  4.14it/s]


Relcaulated embeddings... 
Now save graph... 
Graph simplified and saved to ./GRAPHDATA_TSMC_OUTPUT/simple_graph_graphML_simplified.graphml
Remove small fragments
using weakly connected components...


100%|██████████| 13528/13528 [54:32<00:00,  4.13it/s]


Removing embedding for node no longer in graph: changed open ratio of the mask
Removing embedding for node no longer in graph: chemical composition of rel
Removing embedding for node no longer in graph: comparative analysis
Removing embedding for node no longer in graph: concentration-dependent variability
Removing embedding for node no longer in graph: duty cycle (dc)
Removing embedding for node no longer in graph: experimental setup
Removing embedding for node no longer in graph: hf bath
Removing embedding for node no longer in graph: high frequency and low duty cycle conditions
Removing embedding for node no longer in graph: h€ubner model
Removing embedding for node no longer in graph: intensity levels
Removing embedding for node no longer in graph: ion-enhanced chemical sputtering
Removing embedding for node no longer in graph: mask optimization
Removing embedding for node no longer in graph: pulse frequency
Removing embedding for node no longer in graph: reduced thickness of mask 

In [27]:
save_embeddings(node_embeddings, f'{data_dir}/{embedding_file}')

In [17]:
import numpy as np

while doc_list != []:
    doc = np.random.choice(doc_list)   
    i = doc_list.index(doc)
    
    title = doc.split('/')[-1].split('.pdf')[0]
    doc = doc.split('/')
    doc[-2]+=f'_txt'
    doc[-1]=title+f'/{title}.md'
    doc='/'.join(doc)
    
    title = doc.split('/')[-1].split('.md')[0]
    graph_root = f'{title}'
    print(f'{doc}')
    if os.path.exists(f'{title}.txt'):
        print(f'No. {i}: {title} has been read')
        doc_list.pop(i)
        continue
    
    if os.path.exists(f'{title}_err.txt'):
        print(f'No. {i}: {title} got something wrong.')
        doc_list.pop(i)
        continue
    with open(doc, "r") as f:
        txt = " ".join(f.read().splitlines())  # separate lines with a single space

    try:
        _, graph_GraphML, _, _, _ = make_graph_from_text(txt,generate,
                              include_contextual_proximity=False,
                              graph_root=graph_root,
                              chunk_size=2000,chunk_overlap=200,
                              repeat_refine=0,verbatim=False,
                              data_dir=data_dir,
                              save_PDF=False,#TO DO
                             )
    except Exception as e:
        print(f'Something is wrong with No. {i}: {title}.')
        f = open(f'{title}_err.txt', 'w')
        f.write(f'{e}\n{txt}')
        f.close()          
        continue



In [18]:
# doc = doc_list[0]
# title = doc.split('/')[-1].split('.pdf')[0]
# graph_root = f'{title}'
import networkx as nx

G = nx.read_graphml(f'{data_dir_output}/5books_70b.graphml')
# G = nx.read_graphml(f'{data_dir_output}/4books_integrated.graphml')
print(f'KG loaded: {G}')
# node_embeddings = generate_node_embeddings(G, embedding_tokenizer, embedding_model, )



KG loaded: DiGraph with 14365 nodes and 26743 edges


In [19]:
from GraphReasoning import load_embeddings
embedding_file='TSMC_KG_70b.pkl'
generate_new_embeddings=True

if os.path.exists(f'{data_dir}/{embedding_file}'):
    generate_new_embeddings=False

if generate_new_embeddings:
    try:
        node_embeddings = generate_node_embeddings(G, embedding_tokenizer, embedding_model, )
    except:
        node_embeddings = generate_node_embeddings(nx.DiGraph(), embedding_tokenizer, embedding_model, )
        
    save_embeddings(node_embeddings, f'{data_dir}/{embedding_file}')

else:
    filename = f"{data_dir}/{embedding_file}"
    # file_path = hf_hub_download(repo_id=repository_id, filename=filename, local_dir='./')
    # print(f"File downloaded at: {file_path}")
    node_embeddings = load_embeddings(f'{data_dir}/{embedding_file}')

In [20]:
node_sorted=sorted(list(G.nodes), key= lambda x: -len(x.split()))
node_sorted[0]

'number of adhered particles, particle concentration, particle diffusion coefficient, thickness of the diffusion layer, potential of van der waals force and electrical double layers, boltzmann constant, and absolute temperature'

In [21]:
G.out_edges(list(G.nodes)[1234])

OutEdgeDataView([('free diffusion', 'electron cyclotron frequency')])

In [22]:
list(G.nodes)[1234]

'free diffusion'

In [23]:
len(node_sorted)

14365

In [24]:
from GraphReasoning import node_report

node_report(G)

                                                    Node  In_Degree  \
0      number of adhered particles, particle concentr...          1   
1      argon (ar), carbon tetrafluoride (cf4), helium...          1   
2      operating plasma etching processes within opti...          1   
3      scratches, haze, particles, missing and broken...          1   
4      front end of line (feol), mid end of line (meo...          1   
...                                                  ...        ...   
14360                                   transconductance          3   
14361                                                ron          2   
14362                                              hemts          1   
14363                                            misfets          1   
14364                                            mosfets          1   

       Out_Degree  Degree  Length  
0               0       1      29  
1               0       1      19  
2               0       1      18  
3  

(['number of adhered particles, particle concentration, particle diffusion coefficient, thickness of the diffusion layer, potential of van der waals force and electrical double layers, boltzmann constant, and absolute temperature',
  'argon (ar), carbon tetrafluoride (cf4), helium (he), hydrogen (h2), nitrogen (n2), oxygen (o2), silane (sih4), and silicon tetrafluoride (sif4)',
  'operating plasma etching processes within optimal parameters, frequent cleaning of hardware, and using point-of-use filters in gas lines',
  'scratches, haze, particles, missing and broken lines, cops, stacking faults, air pockets, voids, area defects and clusters',
  'front end of line (feol), mid end of line (meol), and back end of line (beol) modules',
  'absolute temperature, kinematic viscosity, boltzmann constant, fluid density, particle density, particle diameter, and cunningham correction factor',
  'photolithographic process, physical cleaning, physical vapor deposition (pvd), piranha cleaning, resis

In [25]:
graph=G

In [26]:
commnity_file='community_data_70b.pkl'
import pickle
try:
    community_data = pickle.load( open( f"{data_dir_output}/{commnity_file}", "rb" ) )
    communities = community_data['communities']
    community_summaries = community_data['community_summaries']
    
except:
    communities = detect_communities(graph.to_undirected())
    community_summaries = summarize_communities(graph, communities, generate)
    dict_community = {'communities': communities, 'community_summaries': community_summaries} 
    pickle.dump( dict_community, open( f"{data_dir_output}/{commnity_file}", "wb" ))
    

NameError: name 'detect_communities' is not defined

In [None]:
from GraphReasoning import local_search

In [None]:
Q=[]
Q.append('How to make a silicon (si) radical etch with aspect ratio = 15 and cd = 2.5 nm, at 1 atm, 300K?')
Q.append('What are the knobs that can change the uniformity in radical si etching process?')
Q.append('How to increase the selectivity ratio (gas/power/pressure) of si to oxide in ICP (Inductively Coupled Plasma) etching?')
Q.append('How to reduce the particle in the dechuck step?')
Q.append('How to improve the cleaning or etching ability of Al particles?')



In [None]:
response = local_search(Q[0], generate, graph, node_embeddings, embedding_tokenizer, embedding_model, N_samples=3, similarity_threshold=0.95)
print(response)

In [None]:
question = 'What is cvd uniformity and etching uniformity?'

response = local_search(question, generate, graph, node_embeddings, embedding_tokenizer, embedding_model, N_samples=3, similarity_threshold=0.95)
print(response)

In [None]:
A1_local=[]
for q in Q:
    response_local = local_search(q, generate, graph, node_embeddings, embedding_tokenizer, embedding_model, N_samples=3, similarity_threshold=0.9)
    
    A1_local.append(response_local)
    

In [None]:
A1_global=[]
for q in Q:
    response_global = global_search(q, generate, graph, communities, community_summaries, node_embeddings, embedding_tokenizer, embedding_model, N_samples=3, similarity_threshold=0.9)
    
    A1_global.append(response_global)
    

In [None]:
A2=[]
for q in Q:
    final_response = generate(system_prompt= "Answer the query detailedly.",
                                     prompt=f"Query: {q}.")
    A2.append(final_response)

In [None]:
for q,a in zip(Q,A1_local):
    print(q)
    print(a)
    print('----------------------')

In [None]:
for q,a in zip(Q,A1_global):
    print(q)
    print(a)
    print('----------------------')

In [None]:
for q,a in zip(Q,A2):
    print(q)
    print(a)
    print('----------------------')

In [None]:
visualize_embeddings_2d_pretty_and_sample(node_embeddings, n_clusters=10, n_samples=10, data_dir=data_dir_output, alpha=.7)


In [None]:
# describe_communities_with_plots_complex(G, N=6, data_dir=data_dir_output)


In [None]:
# graph_statistics_and_plots_for_large_graphs(G, data_dir=data_dir_output,include_centrality=False,
                                               # make_graph_plot=False,)

In [None]:
is_scale_free (G, data_dir=data_dir_output)

In [None]:
# find_best_fitting_node_list("semiconductor", node_embeddings, embedding_tokenizer, embedding_model, 5)

In [None]:
# find_best_fitting_node_list("better manufactoring process for semiconductor", node_embeddings , embedding_tokenizer, embedding_model, 5)

In [None]:
from llama_cpp import Llama
from llama_cpp.llama_chat_format import Llama3VisionAlphaChatHandler
chat_handler = Llama3VisionAlphaChatHandler(clip_model_path="/home/mkychsu/pool/llm/llama-3-vision-alpha")
llm = Llama(
  model_path=" ~/pool/llm/llama-3-vision-alpha-mmproj-f16.gguf",
  chat_handler=chat_handler,
  n_ctx=2048, # n_ctx should be increased to accommodate the image embedding
)
llm.create_chat_completion(
    messages = [
        {"role": "system", "content": "You are an assistant who perfectly describes images."},
        {
            "role": "user",
            "content": [
                {"type" : "text", "text": "What's in this image?"},
                {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" } }
            ]
        }
    ]
)

In [None]:
model_id = "/home/mkychsu/pool/llm/Cephalo-Phi-3-vision-128k-4b-alpha"

In [None]:
from PIL import Image 
import requests 
from transformers import AutoModelForCausalLM 
from transformers import AutoProcessor 

# model_id = "lamm-mit/Cephalo-Phi-3-vision-128k-4b-beta" 

model = AutoModelForCausalLM.from_pretrained(model_id, device_map="cuda", trust_remote_code=True, torch_dtype="auto")

processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True) 


In [None]:
import glob
papers = glob.glob(f'./paper_new/*[!pdf]')
paper = papers[2]

pngs = glob.glob(f'{paper}/*png')
pngs

In [None]:

caption = """
FIG. 6. Cross-sectional SEM micrographs of poly-Si line-and-space features

![4_image_0.png](4_image_0.png)

etched in an HBr/O2 plasma ~a! before and ~b! after the removal of deposited films by a standard wet clean with ammonia peroxide mixtures.
lations and compared the experimental and numerical results
"""
question = f"Explain all details of what you see in this image with the corresponding caption: {caption}."

messages = [ 
    {"role": "user", "content": f"<|image_1|>\n{question}"}, 
    ] 

# url = "https://www.quantamagazine.org/wp-content/uploads/2018/02/Ants_Lede1300.jpg" 

# image = Image.open(requests.get(url, stream=True).raw) 

# image = Image.open('./paper_new/Profile evolution during polysilicon gate etching with low-pressure high-density cl2 hbr o2  plasma chemistries/4_image_0.png')

prompt = processor.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

inputs = processor(prompt, [image], return_tensors="pt").to("cuda:0") 

generation_args = { 
                    "max_new_tokens": 512, 
                    "temperature": 0.1, 
                    "do_sample": True, 
                    "stop_strings": ['<|end|>',
                                     '<|endoftext|>'],
                    "tokenizer": processor.tokenizer,
                  } 

generate_ids = model.generate(**inputs, eos_token_id=processor.tokenizer.eos_token_id, **generation_args) 

# remove input tokens 
generate_ids = generate_ids[:, inputs['input_ids'].shape[1]:]
response = processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0] 

print(response)
image