### 加载 .env 文件

In [1]:
from dotenv import load_dotenv

import os

load = load_dotenv('./../.env')

print(os.getenv("LANGSMITH_API_KEY"))

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


### 创建LLM模型

In [7]:
from langchain_ollama import ChatOllama

llm = ChatOllama(
    base_url="http://localhost:11434",
    model="deepseek-r1:8b",
    temperature=0.5,
    max_tokens=250
)

llm2 = ChatOllama(
    base_url="http://localhost:11434",
    model="deepseek-r1:8b",
    temperature=0.5,
    max_tokens=250
)

### 理解 Chanining & Runnable

In [3]:
from langchain_core.prompts import (
    ChatPromptTemplate
)

prompt_template = ChatPromptTemplate([
    ("system", "You are an LLM expert"),
    ("user", "What is the advantage of running AI Models in {env}")
])

chain = prompt_template | llm

chain.invoke({"env": "local machine"})

AIMessage(content="<think>\nOkay, so I'm trying to figure out why someone might want to run AI models on their own machine instead of using cloud-based services. Let me think through this step by step.\n\nFirst, data privacy comes to mind. If I have sensitive information that I don't want to store somewhere else, running it locally makes sense. I can control where the data goes and ensure it's not stored online. That seems like a big advantage for certain industries like healthcare or finance where data security is crucial.\n\nThen there's latency and speed. Maybe if you're processing something in real-time, like trading on the stock market, having a model locally could reduce delays caused by sending data to the cloud and waiting for a response. That could be a big win for performance-critical applications.\n\nCost efficiency might also be a factor. If I'm running models frequently, paying for each run on the cloud can add up. Owning the hardware could save money in the long run, espe

### 字符串解析器

In [None]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

prompt_template = ChatPromptTemplate([
    ("system", "You are an LLM expert"),
    ("user", "What is the advantage of running AI Models in {env}")
])

chain = prompt_template | llm | StrOutputParser()

response = chain.invoke({"env": "local machine"})

print(response)

<think>
Okay, so I'm trying to figure out why someone might want to run AI models locally on their own machine instead of using the cloud. Hmm, let me think about this step by step.

First, I know that when you run something in the cloud, like on AWS or Google Cloud, it's usually handled by powerful servers with lots of GPUs and TPUs. That means your model can access a lot of computational power, which is great for training large models quickly. But maybe there are situations where you don't want to send all that data off to a third party.

Privacy and data security might be one reason. If the data is sensitive or contains personal information, sending it to the cloud could violate regulations like GDPR or HIPAA. So running it locally would keep the data on-premises, which is safer.

Another thought is about latency and performance. If you're in a remote area with bad internet connectivity, uploading large datasets to the cloud might take forever. Processing them locally could save tim

### Chaining Multiple Chains

In [6]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser


prompt_template = ChatPromptTemplate([
    ("system", "You are an LLM expert"),
    ("user", "What is the advantage of running AI Models in {env}")
])

detailedResponseChain = prompt_template | llm | StrOutputParser()

headingInfoTemplate = ChatPromptTemplate.from_template(
    """
Analysis the resposne and get me just the heading from the {response}

Response should be in bullet points 
    """
)

chainWithHeading = { "response": detailedResponseChain } | headingInfoTemplate | llm | StrOutputParser()

chainWithHeading.invoke({"env": "local machine"})

print(response)

<think>
Okay, so I'm trying to figure out why someone might want to run AI models locally on their own machine instead of using the cloud. Hmm, let me think about this step by step.

First, I know that when you run something in the cloud, like on AWS or Google Cloud, it's usually handled by powerful servers with lots of GPUs and TPUs. That means your model can access a lot of computational power, which is great for training large models quickly. But maybe there are situations where you don't want to send all that data off to a third party.

Privacy and data security might be one reason. If the data is sensitive or contains personal information, sending it to the cloud could violate regulations like GDPR or HIPAA. So running it locally would keep the data on-premises, which is safer.

Another thought is about latency and performance. If you're in a remote area with bad internet connectivity, uploading large datasets to the cloud might take forever. Processing them locally could save tim

### Running chains in Parallel

In [None]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableParallel

prompt_template = ChatPromptTemplate([
    ("system", "You are an LLM expert"),
    ("user", "What is the advantage of running AI Models in {env}")
])

detailedResponseChain = prompt_template | llm | StrOutputParser()

headingInfoTemplate = ChatPromptTemplate.from_template(
    """
Analysis the resposne and get me just the heading from the {response}

Response should be in bullet points 
    """
)

chainWithHeading = { "response": detailedResponseChain } | headingInfoTemplate | llm2 | StrOutputParser()

parallelRunnables = RunnableParallel(chain1=detailedResponseChain, chain2=chainWithHeading)

response = parallelRunnables.invoke({"env": "local machine"})

print(response['chain1'])
print('\n\n')
print(response['chain2'])

{'chain1': "<think>\nOkay, so I'm trying to figure out why someone would want to run AI models locally on their own machine instead of using a cloud service. I know that cloud services like AWS or Google Cloud offer powerful AI tools, but maybe there are situations where having it on your own machine is better.\n\nFirst, I think about data privacy and control. If you're working with sensitive data, maybe it's better not to send it off to a remote server. You can manage everything on your own machine without worrying about data breaches or who might access the data. That makes sense because not all industries can risk exposing their data like that.\n\nThen there's the issue of latency and speed. If you're processing something in real-time, like video analysis or something time-sensitive, having it on a local machine could be faster than waiting for a cloud service to respond. I remember reading that sometimes cloud services have delays, especially if they're really popular. So for somet

In [None]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableParallel


localMachineTemplate = ChatPromptTemplate([
    ("system", "You are an LLM expert"),
    ("user", "What is the advantage of running AI Models in {env}")
])

localMachineChain = localMachineTemplate | llm | StrOutputParser()

cloudMachineTemplate = ChatPromptTemplate.from_template(
    """
Analysis the resposne and get me just the heading from the {machine} 

Response should be in bullet points 
    """
)

cloudMachineChain = cloudMachineTemplate | llm2 | StrOutputParser()

parallelRunnables = RunnableParallel(chain1=localMachineChain, chain2=cloudMachineChain)

response = parallelRunnables.invoke({"env": "local machine", "machine": "cloud machine"})

print(response['chain1'])
print('\n\n')
print(response['chain2'])

### 运行 Lambda

In [None]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableLambda

prompt_template = ChatPromptTemplate([
    ("system", "You are an LLM expert"),
    ("user", "What is the advantage of running AI Models in {env}")
])

detailedResponseChain = prompt_template | llm | StrOutputParser()

headingInfoTemplate = ChatPromptTemplate.from_template(
    """
Analysis the resposne and get me just the heading from the {response}

Response should be in bullet points 
    """
)

def choose_llm(response):
    response_text = str(response)
    if len(response_text) < 300:
        return llm2
    return llm

llm_selector = RunnableLambda(choose_llm)

chainWithHeading = { "response": detailedResponseChain } | headingInfoTemplate | llm_selector | StrOutputParser()

parallelRunnables = RunnableParallel(chain1=detailedResponseChain, chain2=chainWithHeading)

response = parallelRunnables.invoke({"env": "local machine"})

print(response['chain1'])
print('\n\n')
print(response['chain2'])

### 使用 @Chain 装饰器

In [None]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import chain

prompt_template = ChatPromptTemplate([
    ("system", "You are an LLM expert"),
    ("user", "What is the advantage of running AI Models in {env}")
])

detailedResponseChain = prompt_template | llm | StrOutputParser()

headingInfoTemplate = ChatPromptTemplate.from_template(
    """
Analysis the resposne and get me just the heading from the {response}

Response should be in bullet points 
    """
)

@chain
def choose_llm(response):
    response_text = str(response)
    if len(response_text) < 300:
        return llm2
    return llm

llm_selector = RunnableLambda(choose_llm)

chainWithHeading = { "response": detailedResponseChain } | headingInfoTemplate | llm_selector | StrOutputParser()

parallelRunnables = RunnableParallel(chain1=detailedResponseChain, chain2=chainWithHeading)

response = parallelRunnables.invoke({"env": "local machine"})

print(response['chain1'])
print('\n\n')
print(response['chain2'])