In [10]:
import os

In [12]:
from langchain.schema import Document
from langchain.utilities import ApifyWrapper
from langchain.indexes import VectorstoreIndexCreator

apify = ApifyWrapper()

url = 'https://mermaid.js.org/'

loader = apify.call_actor(
    actor_id='apify/website-content-crawler',
    run_input={'startUrls': [{'url': url}]},
    dataset_mapping_function=lambda item: Document(
        page_content=item['text'] or '', 
        metadata={'source': item['url']}
    ),
)

In [19]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.indexes import VectorstoreIndexCreator
from langchain.vectorstores import FAISS

# 创建嵌入模型
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)

# 创建索引
index = VectorstoreIndexCreator(
    vectorstore_cls=FAISS,
    embedding=embeddings
).from_loaders([loader])

  from tqdm.autonotebook import tqdm, trange


In [22]:
from langchain_openai import OpenAI
from langchain.indexes import VectorstoreIndexCreator

# 创建 LLM
llm = OpenAI(temperature=0)
query = 'What is the syntax for flowcharts?'
result = index.query_with_sources(query, llm=llm)
result

{'question': 'What is the syntax for flowcharts?',
 'answer': ' The syntax for flowcharts includes using double asterisks for bold text and single asterisks for italics. The auto wrapping feature can be disabled by using a specific code. Chaining of links and multiple node links can also be declared in the same line. There are also new types of arrows supported, such as circle edge and cross edge. \n',
 'sources': 'https://mermaid.js.org/news/blog.html, https://mermaid.js.org/syntax/flowchart.html?id=flowcharts-basic-syntax, https://mermaid.js.org/syntax/flowchart.html?id=special-characters-that-break-syntax, https://mermaid.js.org/syntax/flowchart.html'}

In [34]:
retriever = index.vectorstore.as_retriever()
# we change the number of document to return 
retriever.search_kwargs['k'] = 10

In [35]:
text = """
Machine learning (ML) is a field devoted to understanding and building methods that let machines "learn" – that is, methods that leverage data to improve computer performance on some set of tasks.[1] Machine learning algorithms build a model based on sample data, known as training data, in order to make predictions or decisions without being explicitly programmed to do so.[2] Machine learning algorithms are used in a wide variety of applications, such as in medicine, email filtering, speech recognition, agriculture, and computer vision, where it is difficult or unfeasible to develop conventional algorithms to perform the needed tasks.[3][4] A subset of machine learning is closely related to computational statistics, which focuses on making predictions using computers, but not all machine learning is statistical learning. The study of mathematical optimization delivers methods, theory and application domains to the field of machine learning. Data mining is a related field of study, focusing on exploratory data analysis through unsupervised learning.[6][7] Some implementations of machine learning use data and neural networks in a way that mimics the working of a biological brain.[8][9] In its application across business problems, machine learning is also referred to as predictive analytics.
"""

In [75]:
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI

llm = ChatOpenAI(model_name="gpt-4o-mini")

mermaid_qa = RetrievalQA.from_chain_type(
    llm=llm, 
    retriever=retriever,
)

query = """
Your job is to write the code to generate a colorful mermaid diagram describing the logic of following text, you need to decide the topic.
Return only the code and make sure it has multiple colors, and remove the unnecessary punctuation.

TEXT: {text}
"""
result = mermaid_qa.run(query.format(text=text))

In [76]:
result

'```mermaid\ngraph TD\n    A[Machine Learning] -->|Uses| B[Data]\n    A -->|Builds| C[Models]\n    B --> D[Training Data]\n    C --> E[Predictions]\n    C --> F[Decisions]\n    E -->|Applications| G[Medicine]\n    E -->|Applications| H[Email Filtering]\n    E -->|Applications| I[Speech Recognition]\n    E -->|Applications| J[Agriculture]\n    E -->|Applications| K[Computer Vision]\n    A --> L[Statistical Learning]\n    A --> M[Mathematical Optimization]\n    A --> N[Data Mining]\n    M --> O[Methods and Theory]\n    N --> P[Exploratory Data Analysis]\n    A --> Q[Predictive Analytics]\n    \n    style A fill:#ffcc00,stroke:#333,stroke-width:2px;\n    style B fill:#ff6699,stroke:#333,stroke-width:2px;\n    style C fill:#66ccff,stroke:#333,stroke-width:2px;\n    style D fill:#ffcc66,stroke:#333,stroke-width:2px;\n    style E fill:#99ff99,stroke:#333,stroke-width:2px;\n    style F fill:#ff9966,stroke:#333,stroke-width:2px;\n    style G fill:#ffccff,stroke:#333,stroke-width:2px;\n    styl

In [77]:
def extract_mermaid_code(text):
    """
    从文本中提取 Mermaid 代码
    
    Args:
        text (str): 包含 Mermaid 代码的文本字符串
        
    Returns:
        str: 清理后的 Mermaid 代码
    """
    # 如果文本包含 ```mermaid 标记
    if "```mermaid" in text:
        # 分割文本，获取 mermaid 代码部分
        code = text.split("```mermaid")[-1]
        # 移除结尾的 ``` 标记
        code = code.split("```")[0]
    else:
        code = text
        
    # 清理代码
    code = code.strip()  # 移除首尾空白
    code = code.replace("\\n", "\n")  # 替换 \n 字符为实际的换行符
    
    return code

mermaid_code = extract_mermaid_code(result)

In [78]:
import base64
from IPython.display import Image, display
import requests
import matplotlib.pyplot as plt

def mm(graph, save_path=None):
    graphbytes = graph.encode("utf8")
    base64_bytes = base64.urlsafe_b64encode(graphbytes)
    base64_string = base64_bytes.decode("ascii")
    url = "https://mermaid.ink/img/" + base64_string
    
    # 显示图片
    display(Image(url=url))
    
    # 如果指定了保存路径，则下载并保存图片
    if save_path:
        response = requests.get(url)
        if response.status_code == 200:
            with open(save_path, 'wb') as f:
                f.write(response.content)
            print(f"Image saved to {save_path}")
        else:
            print("Failed to download image")

# 使用示例
mm(mermaid_code, save_path="ml_diagram.png")

Image saved to ml_diagram.png


In [61]:
import base64
from IPython.display import Image, display
import matplotlib.pyplot as plt

def mm(graph):
    graphbytes = graph.encode("utf8")
    base64_bytes = base64.urlsafe_b64encode(graphbytes)
    base64_string = base64_bytes.decode("ascii")
    display(Image(url="https://mermaid.ink/img/" + base64_string))

mm("""
graph TD;
    A[Machine Learning ML] -->|devoted to| B[Understanding and Building Methods]
    A -->|methods that| C[Let Machines Learn]
    C -->|leverage| D[Data]
    D -->|improve| E[Computer Performance]
    B -->|build| F[Model]
    F -->|based on| G[Sample Data]
    G -->|known as| H[Training Data]
    F -->|make| I[Predictions or Decisions]
    I -->|without| J[Explicit Programming]
    A -->|used in| K[Wide Variety of Applications]
    K -->|such as| L[Medicine]
    K -->|such as| M[Email Filtering]
    K -->|such as| N[Speech Recognition]
    K -->|such as| O[Agriculture]
    K -->|such as| P[Computer Vision]

    classDef ml fill:#ffcc00,stroke:#000,stroke-width:2px;
    classDef methods fill:#00ccff,stroke:#000,stroke-width:2px;
    classDef data fill:#ff6699,stroke:#000,stroke-width:2px;
    classDef applications fill:#66ff66,stroke:#000,stroke-width:2px;

    class A ml;
    class B,C,D,E methods;
    class F,G,H,I,J data;
    class K,L,M,N,O,P applications;
""")

In [55]:
import base64
from IPython.display import Image, display

def mm(graph):
    graphbytes = graph.encode("utf8")
    base64_bytes = base64.urlsafe_b64encode(graphbytes)
    base64_string = base64_bytes.decode("ascii")
    display(Image(url="https://mermaid.ink/img/" + base64_string))

# 完整的图表代码
ml_diagram = """
graph TD;
    A[Machine Learning (ML)] -->|devoted to| B[Understanding and Building Methods]
    A -->|methods that| C[Let Machines "Learn"]
    C -->|leverage| D[Data]
    D -->|improve| E[Computer Performance]
    B -->|build| F[Model]
    F -->|based on| G[Sample Data]
    G -->|known as| H[Training Data]
    F -->|make| I[Predictions or Decisions]
    I -->|without| J[Explicit Programming]
    A -->|used in| K[Wide Variety of Applications]
    K -->|such as| L[Medicine]
    K -->|such as| M[Email Filtering]
    K -->|such as| N[Speech Recognition]
    K -->|such as| O[Agriculture]
    K -->|such as| P[Computer Vision]

    classDef ml fill:#ffcc00,stroke:#000,stroke-width:2px;
    classDef methods fill:#00ccff,stroke:#000,stroke-width:2px;
    classDef data fill:#ff6699,stroke:#000,stroke-width:2px;
    classDef applications fill:#66ff66,stroke:#000,stroke-width:2px;

    class A ml;
    class B,C,D,E methods;
    class F,G,H,I,J data;
    class K,L,M,N,O,P applications;
"""

# 显示图表
mm(ml_diagram)

In [57]:
# Creating a simple flowchart diagram
from python_mermaid.diagram import (
    MermaidDiagram,
    Node,
    Link
)

# Family members
meg = Node("Meg")
jo = Node("Jo")
beth = Node("Beth")
amy = Node("Amy")
robert = Node("Robert March")

the_march_family = [meg, jo, beth, amy, robert]

# Create links
family_links = [
    Link(robert, meg),
    Link(robert, jo),
    Link(robert, beth),
    Link(robert, amy),
]

chart = MermaidDiagram(
    title="Little Women",
    nodes=the_march_family,
    links=family_links
)

print(chart)

---
title: Little Women
---
graph 
meg["Meg"]
jo["Jo"]
beth["Beth"]
amy["Amy"]
robert_march["Robert March"]
robert_march ---> meg
robert_march ---> jo
robert_march ---> beth
robert_march ---> amy
