In [None]:
from dotenv import load_dotenv

load_dotenv()

In [2]:
import time
from typing_extensions import TypedDict

# LangGraph & LLM-Imports
from langgraph.graph import StateGraph, START, END
from langchain_core.runnables.config import RunnableConfig
from langchain_core.messages import HumanMessage

# Modelle
from langchain_openai import ChatOpenAI
from langchain_ollama import ChatOllama

class AgentState(TypedDict):
    question: str
    answer: str

def call_model(state: AgentState, config: RunnableConfig) -> AgentState:
    model_type = config["configurable"].get("model_type", "openai")
    if model_type == "ollama":
        print("Nutze Ollama (deepseek-r1:7b).")
        llm = ChatOllama(model="deepseek-r1:7b", temperature=0)
    else:
        print("Nutze OpenAI (gpt-4o-mini).")
        llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0)

    messages = [HumanMessage(content=state["question"])]
    response = llm.invoke(messages)
    state["answer"] = response.content
    return state

workflow = StateGraph(AgentState)
workflow.add_edge(START, "agent")
workflow.add_node("agent", call_model)
workflow.add_edge("agent", END)
graph = workflow.compile()

In [None]:
ollama_config = {
    "configurable": {
        "model_type": "ollama"
    }
}
graph.invoke({"question": "What's the highest mountain in the world?"}, ollama_config)

In [4]:
import re

class ModelSwitcher:
    def __init__(self, graph: StateGraph):
        self.graph = graph
        self.last_openai_fail_time = None
        self.openai_config = {
            "configurable": {
                "model_type": "openai",
            }
        }
        self.fallback_config = {
            "configurable": {
                "model_type": "ollama",
            }
        }

    def invoke(self, question: str, remove_think: bool = True) -> str:
        if self._should_skip_openai():
            return self._invoke_fallback(question, remove_think)

        if question.lower() == "force error":
            print("Forcierter Fehler mit ChatOpenAI wird ausgelöst.")
            self.last_openai_fail_time = time.time()
            print("OpenAI wird für 5 Minuten deaktiviert. Fallback wird aufgerufen.")
            return self._invoke_fallback(question, remove_think)

        try:
            print("Versuche Aufruf mit OpenAI ...")
            state = {"question": question, "answer": ""}
            result = self.graph.invoke(state, self.openai_config)
            answer = result["answer"]
            return self._clean_if_needed(answer, remove_think)
        except Exception as e:
            print("Fehler mit OpenAI aufgetreten:", e)
            self.last_openai_fail_time = time.time()
            print("OpenAI wird für 5 Minuten deaktiviert. Fallback wird aufgerufen.")
            return self._invoke_fallback(question, remove_think)

    def _invoke_fallback(self, question: str, remove_think: bool) -> str:
        print("Nutze Fallback (Ollama).")
        state = {"question": question, "answer": ""}
        result = self.graph.invoke(state, self.fallback_config)
        answer = result["answer"]
        return self._clean_if_needed(answer, remove_think)

    def _should_skip_openai(self) -> bool:
        if self.last_openai_fail_time is None:
            return False
        elapsed = time.time() - self.last_openai_fail_time
        if elapsed < 300:
            remaining = 300 - elapsed
            print(f"OpenAI ist noch im Cooldown. Wartezeit bis OpenAI wieder aktiv ist: {remaining:.2f} Sekunden.")
            return True
        return False

    def _clean_if_needed(self, text: str, remove_think: bool) -> str:
        if not remove_think:
            return text
        return self._remove_thinking_tokens(text)

    def _remove_thinking_tokens(self, text: str) -> str:
        pattern = r"<think>.*?</think>"
        text_no_think = re.sub(pattern, "", text, flags=re.DOTALL)
        return text_no_think.lstrip("\n")

In [5]:
model_switcher = ModelSwitcher(graph)

In [None]:
model_switcher.invoke("What's the highest mountain in the world?")

In [None]:
model_switcher.invoke("force error")

In [None]:
model_switcher.invoke("Which city is the capital of France?")

In [None]:
model_switcher.invoke("Which city is the capital of France?")

In [None]:
model_switcher.invoke("Which city is the capital of France?", remove_think=False)