In [2]:
from llama_cpp import Llama
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
# from generate_mindmap import generate_mindmap_svg

In [2]:
llm = Llama(
    model_path="Llama-3.2-1B-Instruct-Q8_0.gguf",
    n_gpu_layers = -1,
    n_ctx=100000,
    n_batch=4096,
    # main_gpu=0
)

llama_model_loader: loaded meta data with 35 key-value pairs and 147 tensors from Llama-3.2-1B-Instruct-Q8_0.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.type str              = model
llama_model_loader: - kv   2:                               general.name str              = Llama 3.2 1B Instruct
llama_model_loader: - kv   3:                           general.finetune str              = Instruct
llama_model_loader: - kv   4:                           general.basename str              = Llama-3.2
llama_model_loader: - kv   5:                         general.size_label str              = 1B
llama_model_loader: - kv   6:                            general.license str              = llama3.2
llama_model_loader: - kv   7:                       

In [3]:
loader = PyPDFLoader("cr1c00107.pdf")
pages = loader.load_and_split()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=250, chunk_overlap=50)
texts = text_splitter.split_documents(pages)
final_text = ""
for text in texts:
    if text.page_content.startswith("REFERENCES"):
        break
    else:
        final_text = final_text + text.page_content
research_paper = ""
for text in final_text:
    if text.startswith(("REFERENCES", "REFERENCESREFERENCES", "REFERENCESREFERENCESREFERENCES")):
        break
    else:
        research_paper = research_paper + text

In [12]:
research_paper = research_paper[:100000]
print("Research Paper: ", research_paper)
print("Research Paper Length: ", len(research_paper))

Research Paper:  Combining Machine Learning and Computational Chemistry for
Predictive Insights Into Chemical Systems
John A. Keith,* Valentin Vassilev-Galindo, Bingqing Cheng, Stefan Chmiela, Michael Gastegger,
Klaus-Robert Müller,* and Alexandre Tkatchenko*Klaus-Robert Müller,* and Alexandre Tkatchenko*
Cite This: Chem. Rev. 2021, 121, 9816−9872 Read Online
ACCESS Metrics & More Article Recommendations
ABSTRACT: Machine learning models are poised to make a transformative impact onchemical sciences by dramatically accelerating computational algorithms and amplifying
insights available from computational chemistry methods. However, achieving this requires aconﬂuence and coaction of expertise in computer science and physical sciences. This Review
is written for new and experienced researchers working at the intersection of bothﬁelds. Weﬁrst provide concise tutorials of computational chemistry and machine learning methods,
showing how insights involving both can be achieved. We follow 

In [13]:
prompt = f'''
You have been provided with a research paper in text format. Your task is to generate a mindmap structure in markdown format that summarizes the research paper.
Your output should use the language \"en\" 0.3 times the length of the original research paper. Do not include anything in the response, that is not the part of mindmap and use the following template (any node in the mindmap should not exceed 10-12 words, also generate additional headings that aren't present in document if required for elaborative explaination):
    # {{Title}} (should be the title of the research paper)
    ## {{Subtitle01}} (as required and as many as required in markdown format)
    - {{Emoji01}} Bulletpoint01 (as required and as many as required in markdown format)
        - {{Emoji01.1}} Bulletpoint01.1 (as required and as many as sub levels required in markdown format)
            - {{Emoji01.1.1}} Bulletpoint01.1.1 (as required and as many as sub levels required in markdown format)
            - {{Emoji01.1.2}} Bulletpoint01.1.2 (as required and as many as sub levels required in markdown format)
        - {{Emoji01.2}} Bulletpoint01.2 (as required and as many as sub levels required in markdown format)
    - {{Emoji02}} Bulletpoint02 (as required and as many as required in markdown format)
        - {{Emoji02.1}} Bulletpoint02.1 (as required and as many as sub levels required in markdown format)
        - {{Emoji02.2}} Bulletpoint02.2 (as required and as many as sub levels required in markdown format)
            - {{Emoji02.2.1}} Bulletpoint02.2.1 (as required and as many as sub levels required in markdown format)
            - {{Emoji02.2.2}} Bulletpoint02.2.2 (as required and as many as sub levels required in markdown format)
            - {{Emoji02.2.3}} Bulletpoint02.2.3 (as required and as many as sub levels required in markdown format)
            - {{Emoji02.2.4}} Bulletpoint02.2.4 (as required and as many as sub levels required in markdown format)
    ## {{Subtitle02}} (as required and as many as required in markdown format)
    - {{Emoji03}} Bulletpoint03 (as required and as many as required in markdown format)
        - {{Emoji03.1}} Bulletpoint03.1 (as required and as many as sub levels required in markdown format)
        - {{Emoji03.2}} Bulletpoint03.2 (as required and as many as sub levels required in markdown format)
            - {{Emoji03.2.1}} Bulletpoint03.2.1 (as required and as many as sub levels required in markdown format)
            - {{Emoji03.2.2}} Bulletpoint03.2.2 (as required and as many as sub levels required in markdown format)
    - {{Emoji04}} Bulletpoint04 (as required and as many as required in markdown format)
        - {{Emoji04.1}} Bulletpoint04.1 (as required and as many as sub levels required in markdown format)
            - {{Emoji04.1.1}} Bulletpoint04.1.1 (as required and as many as sub levels required in markdown format)
            - {{Emoji04.1.2}} Bulletpoint04.1.2 (as required and as many as sub levels required in markdown format)
        - {{Emoji04.2}} Bulletpoint04.2 (as required and as many as sub levels required in markdown format)
            - {{Emoji04.2.1}} Bulletpoint04.2.1 (as required and as many as sub levels required in markdown format)
            - {{Emoji04.2.2}} Bulletpoint04.2.2 (as required and as many as sub levels required in markdown format)
    Summarize the text \"{research_paper}\" to generate a elaborated hierarchical mindmap structure (any node in the mindmap should not exceed 10-12 words, also generate additional headings that aren't present in document if required for elaborative explaination) markdown using the \"en\" language 0.3 times the length of the original research paper. Do not include anything in the response, that is not the part of mindmap
'''

In [4]:
prompt = f'''
As a text script expert, please help me to write a short text script with the topic \\"{research_paper}\\".Your output should only and very strictly use the following template:\\n# {{Title}}\\n## {{Subtitle01}}\\n- {{Emoji01}} Bulletpoint01\\n- {{Emoji02}} Bulletpoint02\\n## {{Subtitle02}}\\n- {{Emoji03}} Bulletpoint03\\n- {{Emoji04}} Bulletpoint04\\n\\nSummarize the giving topic to generate a mind map (as many subtitles as possible, with a minimum of three subtitles, any node in the mindmap should not exceed 10-12 words) structure markdown.\\n Do not include anything in the response, that is not the part of mindmap.\\n  Importantly your output must use language \\"English\\""
'''

In [6]:
print(prompt.replace('\n', '\\n'))


\nAs a text script expert, please help me to write a short text script with the topic \"Combining Machine Learning and Computational Chemistry for\nPredictive Insights Into Chemical Systems\nJohn A. Keith,* Valentin Vassilev-Galindo, Bingqing Cheng, Stefan Chmiela, Michael Gastegger,\nKlaus-Robert Müller,* and Alexandre Tkatchenko*Klaus-Robert Müller,* and Alexandre Tkatchenko*\nCite This: Chem. Rev. 2021, 121, 9816−9872 Read Online\nACCESS Metrics & More Article Recommendations\nABSTRACT: Machine learning models are poised to make a transformative impact onchemical sciences by dramatically accelerating computational algorithms and amplifying\ninsights available from computational chemistry methods. However, achieving this requires aconﬂuence and coaction of expertise in computer science and physical sciences. This Review\nis written for new and experienced researchers working at the intersection of bothﬁelds. Weﬁrst provide concise tutorials of computational chemistry and machine le

In [14]:
response = llm.create_chat_completion(
    messages = [
        {'role':'system',
         'content': 'You are a helpful research assistant for generating mindmaps in MarkDown format from scientific research papers.'},
        {'role':'user',
        'content': prompt}
    ],
    temperature=0.8,
    top_k=500,
    top_p=3.0,
    presence_penalty = 1.0,
    frequency_penalty = 1.1,
    repeat_penalty=5.0,
)
mindmap_data = response['choices'][0]['message']['content']
print(mindmap_data)

Llama.generate: 49 prefix-match hit, remaining 13717 prompt tokens to eval
llama_perf_context_print:        load time =   41052.59 ms
llama_perf_context_print: prompt eval time =       0.00 ms / 13717 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   333 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =   57494.41 ms / 14050 tokens


## Mindmap Structure

# Introduction
- Background: Computational chemistry and machine learning (ML) are two fields that have been growing rapidly in recent years.
  - Advantages of combining them:
    * Improved accuracy for chemical systems modeling.

### CompChem Methods Overview


#### Models & Levels Of Abstraction 
* Extract information from data using simple models, e.g., ideal gas equation
- Ideal Gas Equation: PV = nRT

## Wavefunction Theory (WF) and CorrelatedWavefunctions 

| **Method** |  Description |
|:-----------:|---------------|
1. Hartree-Fock   || HF method for electronic structure calculations.
2.Hartmann Fokker    ||
3.Coulombic Interaction|| CI methods to account electron-electron interactions.

## Hierarchies of Methods

### Wavefunction Theory (WF) Hierarchy
| **Method** |  Description |
|:-----------:|---------------|
1. Hartree-Fock   || HF method for electronic structure calculations.
2.Hartmann Fokker    ||
3.Coulombic Interaction|| CI methods to account el

In [15]:
svg = generate_mindmap_svg(mindmap_data)
try:
    print(svg)
except:
    svg

Introduction_mindmap.svg
