In [15]:
# 安装必要的依赖包
# 如果您还没有安装这些包，请运行以下命令：
# !pip install ollama requests

import ollama
import requests
import json

In [16]:
# 检查Ollama服务是否运行
try:
    response = requests.get('http://localhost:11434/api/tags')
    if response.status_code == 200:
        print("✅ Ollama服务正在运行")
        available_models = response.json()
        print(f"可用模型数量: {len(available_models.get('models', []))}")
        for model in available_models.get('models', []):
            print(f"- {model['name']}")
    else:
        print("❌ Ollama服务未响应")
except requests.exceptions.ConnectionError:
    print("❌ 无法连接到Ollama服务，请确保Ollama已启动")
    print("请在终端运行: ollama serve")

✅ Ollama服务正在运行
可用模型数量: 1
- gpt-oss:20b


In [22]:
# 流式推理示例（实时显示生成的文本）
def streaming_chat(prompt, model_name="gpt-oss:20b"):
    """
    使用Ollama进行流式文本生成
    """
    print(f"AI助手正在思考中...")
    print("-" * 50)
    
    try:
        stream = ollama.chat(
            model=model_name,
            messages=[{'role': 'user', 'content': prompt}],
            stream=True,
        )
        
        full_response = ""
        for chunk in stream:
            content = chunk['message']['content']
            print(content, end='', flush=True)
            full_response += content
            
        print("\n" + "-" * 50)
        return full_response
        
    except Exception as e:
        return f"错误: {e}"

# 测试流式推理
test_prompt2 = "请写一首关于秋天的短诗"
print(f"用户: {test_prompt2}")
response2 = streaming_chat(test_prompt2)

用户: 请写一首关于秋天的短诗
AI助手正在思考中...
--------------------------------------------------
秋秋风风起起，，黄黄叶叶漫漫天天  
  
金金桂桂飘飘香香，，古古道道低低回回  
  
夕夕阳阳西西下下，，映映照照残残云云  
  
暮暮色色沉沉沉沉，，秋秋意意浓浓郁郁  
  
归归心心似似鹿鹿，，轻轻踏踏落落叶叶。。
--------------------------------------------------

--------------------------------------------------


In [None]:
# 高级配置和参数调整
def advanced_chat(prompt, model_name="gpt-oss:20b", **options):
    """
    使用自定义参数进行推理
    
    常用参数:
    - temperature: 控制输出的随机性 (0.0-1.0)
    - top_p: 核采样参数 (0.0-1.0)
    - top_k: 限制考虑的token数量
    - num_predict: 最大生成token数
    - repeat_penalty: 重复惩罚 (1.0为无惩罚)
    """
    try:
        response = ollama.chat(
            model=model_name,
            messages=[{'role': 'user', 'content': prompt}],
            options=options
        )
        return response['message']['content']
    except Exception as e:
        return f"错误: {e}"

# 测试不同参数设置
test_prompt3 = "请创造性地描述一个未来城市"

print("🔥 高创造性设置 (temperature=0.9):")
print("-" * 50)
creative_response = advanced_chat(
    test_prompt3, 
    temperature=0.9,
    top_p=0.9,
    num_predict=200
)
print(creative_response)

print("\n" + "=" * 60)

print("🧊 保守设置 (temperature=0.1):")
print("-" * 50)
conservative_response = advanced_chat(
    test_prompt3,
    temperature=0.1,
    top_p=0.5,
    num_predict=200
)
print(conservative_response)

In [None]:
# 多轮对话示例
class OllamaChat:
    def __init__(self, model_name="gpt-oss:20b"):
        self.model_name = model_name
        self.conversation_history = []
    
    def chat(self, user_message, **options):
        """发送消息并获取回复"""
        # 添加用户消息到历史
        self.conversation_history.append({
            'role': 'user',
            'content': user_message
        })
        
        try:
            # 使用完整的对话历史进行推理
            response = ollama.chat(
                model=self.model_name,
                messages=self.conversation_history,
                options=options
            )
            
            # 添加AI回复到历史
            ai_message = response['message']['content']
            self.conversation_history.append({
                'role': 'assistant',
                'content': ai_message
            })
            
            return ai_message
            
        except Exception as e:
            return f"错误: {e}"
    
    def clear_history(self):
        """清除对话历史"""
        self.conversation_history = []
    
    def get_history(self):
        """获取对话历史"""
        return self.conversation_history

# 创建聊天实例
chatbot = OllamaChat()

# 进行多轮对话
print("开始多轮对话测试:")
print("=" * 60)

# 第一轮
user_msg1 = "你好，我想学习Python编程"
response1 = chatbot.chat(user_msg1, temperature=0.7)
print(f"用户: {user_msg1}")
print(f"AI: {response1}")
print("-" * 60)

# 第二轮（基于上下文）
user_msg2 = "请推荐一些适合初学者的Python书籍"
response2 = chatbot.chat(user_msg2, temperature=0.7)
print(f"用户: {user_msg2}")
print(f"AI: {response2}")
print("-" * 60)

# 第三轮（继续上下文）
user_msg3 = "那么应该从哪本书开始呢？"
response3 = chatbot.chat(user_msg3, temperature=0.7)
print(f"用户: {user_msg3}")
print(f"AI: {response3}")

print(f"\n对话历史长度: {len(chatbot.get_history())} 条消息")

In [None]:
# 实用工具函数
import time

def get_model_info(model_name="gpt-oss:20b"):
    """获取模型详细信息"""
    try:
        models = ollama.list()
        for model in models['models']:
            if model['name'].startswith(model_name):
                print(f"模型名称: {model['name']}")
                print(f"大小: {model.get('size', 'Unknown')}")
                print(f"修改时间: {model.get('modified_at', 'Unknown')}")
                return model
        print(f"未找到模型: {model_name}")
        return None
    except Exception as e:
        print(f"错误: {e}")
        return None

def benchmark_model(model_name="gpt-oss:20b", prompt="Hello, how are you?"):
    """简单的性能基准测试"""
    print(f"对模型 {model_name} 进行性能测试...")
    print(f"测试提示: {prompt}")
    print("-" * 50)
    
    start_time = time.time()
    
    try:
        response = ollama.chat(
            model=model_name,
            messages=[{'role': 'user', 'content': prompt}]
        )
        
        end_time = time.time()
        response_time = end_time - start_time
        
        response_text = response['message']['content']
        response_length = len(response_text)
        
        print(f"响应时间: {response_time:.2f} 秒")
        print(f"响应长度: {response_length} 字符")
        print(f"生成速度: {response_length/response_time:.2f} 字符/秒")
        print(f"响应内容: {response_text[:100]}...")
        
        return {
            'response_time': response_time,
            'response_length': response_length,
            'response_text': response_text
        }
        
    except Exception as e:
        print(f"测试失败: {e}")
        return None

# 获取模型信息
print("📊 模型信息:")
get_model_info()

print("\n" + "=" * 60)

# 进行性能测试
print("🚀 性能测试:")
benchmark_result = benchmark_model(prompt="用一句话解释量子计算")