In [None]:
import requests
import json

from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate

## Setup LM studio endpoint

In [5]:

# LM Studio endpoint
url = "http://127.0.0.1:1234/v1/chat/completions"

# Define your question
question = "What can you tell me about large language models?"

# Prepare the payload
payload = {
    "messages": [
        {"role": "user", "content": question}
    ],
    "temperature": 0.7,
    "max_tokens": 50
}

# Set headers for the API request
headers = {
    "Content-Type": "application/json"
}


In [6]:

# Send the request
try:
    response = requests.post(url, json=payload, headers=headers)
    response.raise_for_status()  # Raise an exception for HTTP errors
    
    # Parse the response
    result = response.json()
    
    # Extract and print the answer
    if "choices" in result and len(result["choices"]) > 0:
        answer = result["choices"][0]["message"]["content"]
        print("Model response:")
        print(answer)
    else:
        print("Unexpected response format:", result)
        
except requests.exceptions.RequestException as e:
    print(f"Error making request: {e}")
except json.JSONDecodeError:
    print(f"Error parsing response as JSON: {response.text}")

Model response:
Okay, let's break down Large Language Models (LLMs). They're a *very* hot topic right now and driving a lot of innovation. Here's a comprehensive overview covering what they are, how they work, their capabilities


## Local LLMs with Langchain

In [None]:
# LM Studio provides an OpenAI-compatible API
# We use ChatOpenAI from langchain_openai and point it to local endpoint
llm = ChatOpenAI(
    model="gemma-3",  # This can be any string as LM Studio will use the loaded model
    base_url="http://127.0.0.1:1234/v1",  # LM Studio local endpoint
    api_key="not-needed",  # LM Studio doesn't require an API key but the parameter is needed
    temperature=0.7,
    max_tokens=500,
    streaming=True
)

In [9]:
# Create a simple prompt template
prompt = ChatPromptTemplate.from_messages([
    ("user", "{input}")
])

# Create a chain that will pass the question to the model
chain = prompt | llm

# Execute the chain
try:
    result = chain.invoke({"input": question})
    print("Model response:")
    print(result.content)
except Exception as e:
    print(f"Error: {e}")

Model response:
Okay, let's dive into Large Language Models (LLMs). They're a *very* hot topic right now, and for good reason – they're fundamentally changing how we interact with computers. Here's a breakdown covering what they are, how they work, their capabilities, limitations, and current trends.  I'll try to be as clear as possible without getting *too* technical (though some technical explanation is necessary).

**1. What ARE Large Language Models?**

* **Essentially, sophisticated text predictors:** At their core, LLMs are designed to predict the next word in a sequence of words. They do this based on patterns they've learned from massive amounts of text data.
* **"Large" refers to two things:**
    * **Size of Data:**  They're trained on *massive* datasets – think billions or even trillions of words scraped from the internet, books, articles, code repositories, and more.
    * **Number of Parameters:** "Parameters" are essentially the adjustable knobs within the model that allo