In [None]:
# ollama pull gemma3

In [1]:
import warnings
warnings.filterwarnings('ignore')

### 로컬모델 실행 위한 GPU 사용 환경 확인하기

In [None]:
!nvidia-smi

In [None]:
import torch
torch.cuda.is_available()

### Ollama로 Gemma3 4B 추론하기

In [None]:
%pip install -U ollama

In [None]:
from ollama import chat
from ollama import ChatResponse

response: ChatResponse = chat(model='gemma3:4b', messages=[
  {
    'role': 'user',
    'content': '안녕하세요, 당신은 누구인가요?',
  },
],
    options={
        'temperature': 0.7,        # 창의성 조절 (0.0 ~ 1.0)
        'top_p': 0.9,             # 샘플링 확률 임계값
        'top_k': 40,              # 다음 토큰 선택시 고려할 상위 토큰 수
        'num_predict': 100,       # 생성할 최대 토큰 수
        'repeat_penalty': 1.1,    # 반복 패널티
        'presence_penalty': 0,    # 새로운 주제 등장 확률 조절
        'frequency_penalty': 0,   # 단어 반복 감소 조절
        'stop': ['\n', 'User:']   # 생성 중단 토큰 설정
    }
)

print(response.message.content)

In [None]:
from ollama import chat

stream = chat(
    model='gemma3:4b',
    messages=[{'role': 'user', 'content': '안녕하세요, 당신은 누구인가요?'}],
    stream=True,
)

for chunk in stream:
  print(chunk['message']['content'], end='', flush=True)

In [6]:
# 전체 사용 예시
import ollama
import base64
from PIL import Image
import matplotlib.pyplot as plt
import io
import asyncio
from ollama import AsyncClient

# 이미지 변환 함수들
def image_to_base64(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')

def pil_to_base64(image):
    buffered = io.BytesIO()
    image.save(buffered, format="PNG")
    return base64.b64encode(buffered.getvalue()).decode('utf-8')

def display_image(image_path):
    img = Image.open(image_path)
    plt.figure(figsize=(10, 10))
    plt.imshow(img)
    plt.axis('off')
    plt.show()
    return img

# 기본 이미지 추론
def analyze_image(image_path, prompt="이 이미지에 대해 설명해주세요."):
    """
    이미지를 분석하고 결과를 반환하는 함수
    """
    # 이미지 표시
    display_image(image_path)
    
    model_name = "gemma3:4b"
    
    # 모델에 이미지와 함께 쿼리 보내기
    response = ollama.chat(
        model=model_name,
        messages=[
            {
                "role": "user",
                "content": prompt,
                "images": [image_to_base64(image_path)]
            }
        ]
    )
    
    # 응답 출력
    return response['message']['content']

In [None]:
# 사용 예시
# 1. 단일 이미지 분석
result = analyze_image("./data/docling_processing.png", "이 이미지에서 무엇이 보이나요?")
print(result)

### Huggingface로 Gemma3 4B 추론하기

In [None]:
%pip install -U transformers

In [None]:
from transformers import pipeline
import torch

pipe = pipeline(
    "image-text-to-text",
    model="google/gemma-3-4b-it",
    device="cuda",
    torch_dtype=torch.bfloat16
)

In [None]:
messages = [
    {
        "role": "system",
        "content": [{"type": "text", "text": "You are a helpful assistant."}]
    },
    {
        "role": "user",
        "content": [
            {"type": "image", "url": "./data/docling_processing.png"},
            {"type": "text", "text": "이 이미지를 설명해주세요"}
        ]
    }
]

output = pipe(text=messages, max_new_tokens=200)
print(output[0]["generated_text"][-1]["content"])

### Together AI API로 Llama 4 추론하기

Together AI API Playground URL

https://api.together.ai/playground/v2/chat/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8

In [None]:
# %pip install --upgrade together

In [None]:
from dotenv import load_dotenv
load_dotenv()

In [None]:
from together import Together

client = Together()

response = client.chat.completions.create(
    model="meta-llama/Llama-4-Scout-17B-16E-Instruct",
    messages=[{"role": "user", "content": "안녕하세요, 당신은 누구인가요?"}]
)
print(response.choices[0].message.content)

In [None]:
from together import Together
import base64

imagePath= "./data/docling_processing.png"

def encode_image(image_path):
        with open(image_path, "rb") as image_file:
            return base64.b64encode(image_file.read()).decode('utf-8')

base64_image = encode_image(imagePath)

stream = client.chat.completions.create(
    model="meta-llama/Llama-4-Scout-17B-16E-Instruct",
    messages=[
        {
            "role": "user",
            "content": [
                {"type": "text", "text": "이 이미지를 한글로 설명해주세요"},
                {
                    "type": "image_url",
                    "image_url": {
                        "url": f"data:image/jpeg;base64,{base64_image}",
                    },
                },
            ],
        }
    ],
    stream=True,
)

for chunk in stream:
    print(chunk.choices[0].delta.content or "" if chunk.choices else "", end="", flush=True)

### Groq AI로 Llama 4 추론하기

Groq Cloud API Playground

https://console.groq.com/playground

In [None]:
# %pip install groq

In [None]:
import os

from groq import Groq

client = Groq()

chat_completion = client.chat.completions.create(
    messages=[
        {
            "role": "system",
            "content": "you are a helpful assistant."
        },
        {
            "role": "user",
            "content": "첫번째로 개발된 LLM은 무엇인가요?",
        }
    ],
    model="meta-llama/llama-4-scout-17b-16e-instruct",
)

print(chat_completion.choices[0].message.content)

In [None]:
imagePath= "./data/docling_processing.png"

def encode_image(image_path):
        with open(image_path, "rb") as image_file:
            return base64.b64encode(image_file.read()).decode('utf-8')

base64_image = encode_image(imagePath)

completion = client.chat.completions.create(
    model="meta-llama/llama-4-scout-17b-16e-instruct",
    messages=[
        {
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": "What's in this image?"
                },
                {
                    "type": "image_url",
                    "image_url": {
                        "url": f"data:image/jpeg;base64,{base64_image}",
                    }
                }
            ]
        }
    ],
    temperature=1,
    max_completion_tokens=1024,
    top_p=1,
    stream=False,
    stop=None,
)

print(completion.choices[0].message.content)

### Cerebras로 Llama 4 추론하기

Cerebras Cloud API Playground

https://cloud.cerebras.ai/platform/org_8ryjmjdft9v69jyh4fdn5hn9/playground

In [None]:
# %pip install cerebras_cloud_sdk

In [None]:
from cerebras.cloud.sdk import Cerebras

client = Cerebras()

chat_completion = client.chat.completions.create(
    messages=[
        {
            "role": "user",
            "content": "첫번째로 개발된 LLM은 무엇인가요?",
        }
    ],
  model="llama-4-scout-17b-16e-instruct",
)

print(chat_completion.choices[0].message.content)

### Langchain으로 여러 오픈소스 플랫폼의 LLM 활용하기

In [None]:
from langchain_huggingface.llms import HuggingFacePipeline
import torch

hf = HuggingFacePipeline.from_model_id(
    model_id="google/gemma-3-1b-it",
    task="text-generation",
    pipeline_kwargs={"max_new_tokens": 10, "torch_dtype": torch.bfloat16},
    device=0,
)

In [None]:
messages = [
    ("system", "You are a helpful assistant."),
    ("human", "안녕하세요, 당신은 누구인가요?"),
]

print(hf.invoke(messages))

In [None]:
from langchain_ollama import ChatOllama

llm = ChatOllama(
    model="gemma3:4b",
    temperature=0,
    repeat_penalty=1.1,
    # other params...
)

messages = [
    (
        "system",
        "You are a helpful assistant",
    ),
    ("human", "안녕하세요, 당신은 누구인가요?"),
]

ai_msg = llm.invoke(messages)
print(ai_msg.content)

In [None]:
for chunk in llm.stream(messages):
    print(chunk.content, end="", flush=True)

In [None]:
from langchain_ollama import ChatOllama
from langchain_core.messages import HumanMessage, SystemMessage
import base64
from PIL import Image
import io

# 이미지 데이터를 base64로 인코딩하는 함수
def encode_image_to_base64(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')

# 이미지와 텍스트가 함께 있는 메시지 예시
image_path = "./data/docling_processing.png"  # 실제 이미지 경로로 변경 필요
image_b64 = encode_image_to_base64(image_path)

# Ollama 멀티모달 모델 설정 (gemma3:4b 대신 멀티모달 지원 모델 사용)
# 예: llava, bakllava, llava-llama3 등
llm = ChatOllama(
    model="gemma3:4b",  # 또는 다른 멀티모달 지원 모델
    temperature=0,
    repeat_penalty=1.1,
)

# 이미지와 텍스트가 함께 있는 메시지 구성 방법
multimodal_message = HumanMessage(
    content=[
        {
            "type": "image_url",
            "image_url": f"data:image/jpeg;base64,{image_b64}",
        },
        {
            "type": "text", 
            "text": "이 이미지에 대해 설명해주세요."
        }
    ]
)

# 멀티모달 메시지 호출 예시
multimodal_response = llm.invoke([SystemMessage(content="You are a helpful assistant."), multimodal_message])
print(multimodal_response.content)

In [None]:
%pip install -qU langchain-groq

In [None]:
from langchain_groq import ChatGroq
import base64

llm = ChatGroq(
    model="meta-llama/llama-4-scout-17b-16e-instruct",
    temperature=1,
)

messages = [
    ("system", "You are a helpful assistant."),
    ("human", "안녕하세요, 당신은 누구인가요?"),
]

# 텍스트 메시지 스트리밍
for chunk in llm.stream(messages):
    print(chunk.content, end="", flush=True)

In [None]:
from langchain_groq import ChatGroq
import base64

llm = ChatGroq(
    model="meta-llama/llama-4-scout-17b-16e-instruct",
    temperature=1,
)

# 이미지 경로
imagePath = "./data/docling_processing.png"

# 이미지 인코딩 함수
def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')

# 이미지 인코딩
base64_image = encode_image(imagePath)

# 텍스트 메시지 예시


# 이미지와 함께 메시지 보내기 예시
multimodal_messages = [
    ("system", "You are a helpful assistant."),
    ("human", [
        {
            "type": "text",
            "text": "What's in this image?"
        },
        {
            "type": "image_url",
            "image_url": {
                "url": f"data:image/jpeg;base64,{base64_image}",
            }
        }
    ])
]

for chunk in llm.stream(multimodal_messages):
    print(chunk.content, end="", flush=True)

### init_chat_model로 동일한 형식으로 모델 불러오기

In [None]:
from langchain.chat_models import init_chat_model

ollama=init_chat_model(
    "gemma3:4b", model_provider="ollama",temperature=0
)

groq=init_chat_model(
    "meta-llama/llama-4-scout-17b-16e-instruct", model_provider="groq",temperature=0
)  



llm_list = [
    {"name": "Ollama (Gemma 3 4B)", "model": ollama},
    {"name": "Groq (Llama-4-Scout-17B)", "model": groq},
]

for llm_info in llm_list:
    print(f"모델: {llm_info['name']}")
    print("="*50)
    response = llm_info["model"].invoke("안녕하세요, 당신은 누구인가요?")
    print(response.content)
    print("-"*50)
    print("\n")