In [None]:
import os
from dotenv import load_dotenv
load_dotenv()
# Set Hugging Face API Token (Replace with your actual token)
os.environ["HUGGINGFACEHUB_API_TOKEN"] = os.getenv("HUGGINGFACEHUB_API_TOKEN")

In [None]:
import huggingface_hub

In [None]:
from huggingface_hub import notebook_login

notebook_login()


In [None]:
import torch
print(torch.__version__)
print("CUDA Available:", torch.cuda.is_available())


In [None]:
from langchain.llms import HuggingFaceHub
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.document_loaders import PyPDFLoader
from langchain.chains import RetrievalQA
from langchain_huggingface import HuggingFaceEndpoint

from langchain_huggingface import HuggingFacePipeline
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

llm = HuggingFacePipeline.from_model_id(
    model_id="google/flan-t5-small",
    task="text-generation",
    pipeline_kwargs={"max_new_tokens": 10},
)

import gradio as gr


# Suppress warnings
def warn(*args, **kwargs):
    pass

import warnings
warnings.warn = warn
warnings.filterwarnings('ignore')

## LLM using Hugging Face
def get_llm():
    llm = llm
    return llm

## Document loader with debugging
def document_loader(file):
    loader = PyPDFLoader(file.name)
    docs = loader.load_and_split()
    for doc in docs:
        print(doc.page_content)
    return docs


## Text splitter with debugging
def text_splitter(data):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=500,
        chunk_overlap=50,
        length_function=len,
    )
    chunks = text_splitter.split_documents(data)
    if not chunks:
        raise ValueError("Text splitting failed: No chunks were created from the document.")
    print(f"Generated {len(chunks)} text chunks.")
    return chunks

## Embedding model using Hugging Face
def huggingface_embedding():
    embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return embedding_model

## Vector database with debugging
def vector_database(chunks):
    embedding_model = huggingface_embedding()
    vectordb = Chroma.from_documents(chunks, embedding_model,persist_directory="./chroma_db")
    vectordb.persist()
    print("Vector database created successfully.")
    return vectordb

## Retriever with debugging
def retriever(file):
    splits = document_loader(file)
    chunks = text_splitter(splits)
    vectordb = vector_database(chunks)
    retriever = vectordb.as_retriever()
    return retriever

## QA Chain
def retriever_qa(file, query):
    llm = get_llm()
    retriever_obj = retriever(file)
    qa = RetrievalQA.from_chain_type(llm=llm, 
                                     chain_type="stuff", 
                                     retriever=retriever_obj, 
                                     return_source_documents=False)
    response = qa.invoke(query)
    return response['result']

# Create Gradio interface
rag_application = gr.Interface(
    fn=retriever_qa,
    allow_flagging="never",
    inputs=[
        gr.File(label="Upload PDF File", file_count="single", file_types=['.pdf'], type="filepath"),  # Drag and drop file upload
        gr.Textbox(label="Input Query", lines=2, placeholder="Type your question here...")
    ],
    outputs=gr.Textbox(label="Output"),
    title="RAG Chatbot",
    description="Upload a PDF document and ask any question. The chatbot will try to answer using the provided document."
)

# Launch the app
rag_application.launch(server_name="0.0.0.0", server_port=7860)


In [None]:
gr.close_all()

In [None]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

model_id = "google/flan-t5-small"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForSeq2SeqLM.from_pretrained(model_id)

input_text = "Translate English to French: Hello, how are you?"
input_ids = tokenizer(input_text, return_tensors="pt").input_ids
outputs = model.generate(input_ids)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))


In [None]:
def document_loader(file):
    loader = PyPDFLoader(file)
    for page in loader.load_and_split():
        return page

In [None]:
print(document_loader("/home/kronos/Desktop/UAI.pdf"))

In [None]:
from langchain.document_loaders import PyPDFLoader
loader = PyPDFLoader("/home/kronos/Desktop/hindu.pdf")
pages = loader.load()

In [None]:
len(pages)

In [None]:
page = pages[0]

In [None]:
print(page.page_content)

In [None]:
page.metadata

In [None]:
page

In [None]:
from langchain.llms import HuggingFaceHub


In [None]:
!pip install llama-index


In [None]:
import os
from getpass import getpass
from huggingface_hub import login

In [None]:
HF_Token = getpass()

In [None]:
login(token = HF_Token)

In [None]:
from llama_index.core.agent import ReActAgent
from llama_index.llms.openai import OpenAI
from llama_index.core.tools import FunctionTool

In [None]:
def multiply(a: float, b: float) -> float:
    """Multiply two numbers and returns the product"""
    return a * b


multiply_tool = FunctionTool.from_defaults(fn=multiply)


def add(a: float, b: float) -> float:
    """Add two numbers and returns the sum"""
    return a + b


add_tool = FunctionTool.from_defaults(fn=add)

In [None]:
from sqlalchemy import create_engine, text
dbEngine = create_engine('sqlite:////home/kronos/Desktop/raman.db')

In [None]:
import pandas as pd
r = pd.read_sql('select name from sqlite_master',dbEngine)

In [None]:
def schema(x: str) -> str:
    sql = f"SELECT sql FROM sqlite_master WHERE name = '{x}';"
    with dbEngine.connect() as conn:
        result = conn.execute(text(sql))
        return result.fetchall()


In [None]:
r['schema'] = r['name'].map(schema)

In [None]:
r[r.loc[:,'name']=='WPA_all_time_connect']

In [None]:
for i in r.schema:
    print(i)

In [None]:
agent = ReActAgent.from_tools([multiply_tool, add_tool], llm=llm, verbose=True)

In [None]:
'''LLM's supported are OpenAI, HuggingFaceLLM, LangchainLLM, CustomLLM'''

In [None]:
import torch
from llama_index.llms.huggingface import HuggingFaceLLM
from llama_index.core import PromptTemplate
from llama_index.core import Settings

In [None]:
# setup prompts - specific to StableLM
from llama_index.core import PromptTemplate

# This will wrap the default prompts that are internal to llama-index
# taken from https://huggingface.co/Writer/camel-5b-hf
query_wrapper_prompt = PromptTemplate(
    "Below is an instruction that describes a task. "
    "Write a response that appropriately completes the request.\n\n"
    "### Instruction:\n{query_str}\n\n### Response:"
)

In [None]:
import torch
from transformers import AutoModelForSeq2SeqLM
llm = HuggingFaceLLM(
    context_window=512,
    max_new_tokens=256,
    generate_kwargs={"temperature": 0.25, "do_sample": False},
    query_wrapper_prompt=query_wrapper_prompt,
    tokenizer_name="google/flan-t5-small",
    model_name="google/flan-t5-small",
    device_map="auto",
    tokenizer_kwargs={"max_length": 512},
    model_class=AutoModelForSeq2SeqLM
    # uncomment this if using CUDA to reduce memory usage
    # model_kwargs={"torch_dtype": torch.float16}
)

Settings.chunk_size = 512
Settings.llm = llm

In [None]:
%pip install llama-index-llms-huggingface
%pip install llama-index-llms-huggingface-api

In [None]:
!pip install dspy-ai

In [None]:
import dspy
model="huggingface/google/flan-t5-small"
lm = dspy.LM(model=model, temperature=0.9, max_tokens=200, stop=None, cache=False)
# Configure DSPy to use this LM
dspy.configure(lm=lm)


In [None]:
qa = dspy.Predict('question: str -> response: str')
qa(question="what are high memory and low memory on linux?", )

In [None]:
%pip install llama-index-llms-litellm

In [None]:
from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
remotely_run_anon = HuggingFaceInferenceAPI(get_recommended_model = True)


In [None]:
completion_response = remotely_run_anon.complete("To infinity, and")
print(completion_response)

In [None]:
from smolagents import CodeAgent, DuckDuckGoSearchTool, HfApiModel, load_tool, tool
import datetime
import requests
import pytz
import yaml
from tools.final_answer import FinalAnswerTool


In [None]:
import os
os.getcwd()

In [None]:
@tool
def my_custom_tool(arg1:str, arg2:int)-> str: #it's import to specify the return type
    #Keep this format for the description / args / args description but feel free to modify the tool
    """A tool that does nothing yet 
    Args:
        arg1: the first argument
        arg2: the second argument
    """
    return "What magic will you build ?"

@tool
def get_current_time_in_timezone(timezone: str) -> str:
    """A tool that fetches the current local time in a specified timezone.
    Args:
        timezone: A string representing a valid timezone (e.g., 'America/New_York').
    """
    try:
        # Create timezone object
        tz = pytz.timezone(timezone)
        # Get current time in that timezone
        local_time = datetime.datetime.now(tz).strftime("%Y-%m-%d %H:%M:%S")
        return f"The current local time in {timezone} is: {local_time}"
    except Exception as e:
        return f"Error fetching time for timezone '{timezone}': {str(e)}"

In [None]:
final_answer = FinalAnswerTool()
model = HfApiModel(
    max_tokens=2096,
    temperature=0.5,
    model_id='Qwen/Qwen2.5-Coder-32B-Instruct',
    custom_role_conversions=None,
)

with open("prompts.yaml", 'r') as stream:
    prompt_templates = yaml.safe_load(stream)
    
# We're creating our CodeAgent
agent = CodeAgent(
    model=model,
    tools=[final_answer], ## add your tools here (don't remove final answer)
    max_steps=6,
    verbosity_level=1,
    grammar=None,
    planning_interval=None,
    name=None,
    description=None,
    prompt_templates=prompt_templates
)

GradioUI(agent).launch()

In [None]:
!

In [None]:
from dspy.datasets import MATH
dataset = MATH(subset = 'algebra')

In [None]:
example = dataset.train[0]

In [None]:
import dspy
lm = dspy.LM('ollama_chat/qwen2.5-coder:3b', api_base = 'http://localhost:11434')
# Configure DSPy to use this LM
dspy.configure(lm=lm)


In [None]:
module = dspy.ChainOfThought('question -> python_code')
question = 'Code for Fibonacci Number'

In [None]:
from dspy.datasets import MATH

dataset = MATH(subset='algebra')
print(len(dataset.train), len(dataset.dev))

In [None]:
example = dataset.train[0]
print("Question:", example.question)
print("Answer:", example.answer)

In [None]:
module = dspy.ChainOfThoughtWithHint("question -> answer")
print(module(question=question).answer)

In [None]:
THREADS = 24
kwargs = dict(num_threads=THREADS, display_progress=True, display_table=5)
evaluate = dspy.Evaluate(devset=dataset.dev, metric=dataset.metric, **kwargs,provide_traceback = True)

evaluate(module)

In [None]:
pip install git+https://github.com/hendrycks/math.git

In [None]:
class CheckCitationFaithfulness(dspy.Signature):
    """Verify that the text is based on the provided context."""

    context: str = dspy.InputField(desc="Here the context is given for the LLM")
    answer: str = dspy.InputField(desc= "A keyword from the context above")
    question: str = dspy.OutputField(desc = "A question built from the context with the answer is the solution")

context = ""

text = "swadeshi movement"

faithfulness = dspy.ChainOfThought(CheckCitationFaithfulness)
print(faithfulness(context=context, answer=text).question)

In [None]:
dspy.inspect_history()

In [None]:
lm(messages = [{"role":"user","content":"Say this is a test"}])

In [None]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

# 4bit pre quantized models we support for 4x faster downloading + no OOMs.
fourbit_models = [
    "unsloth/Meta-Llama-3.1-8B-bnb-4bit",      # Llama-3.1 15 trillion tokens model 2x faster!
    "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit",
    "unsloth/Meta-Llama-3.1-70B-bnb-4bit",
    "unsloth/Meta-Llama-3.1-405B-bnb-4bit",    # We also uploaded 4bit for 405b!
    "unsloth/Mistral-Nemo-Base-2407-bnb-4bit", # New Mistral 12b 2x faster!
    "unsloth/Mistral-Nemo-Instruct-2407-bnb-4bit",
    "unsloth/mistral-7b-v0.3-bnb-4bit",        # Mistral v3 2x faster!
    "unsloth/mistral-7b-instruct-v0.3-bnb-4bit",
    "unsloth/Phi-3.5-mini-instruct",           # Phi-3.5 2x faster!
    "unsloth/Phi-3-medium-4k-instruct",
    "unsloth/gemma-2-9b-bnb-4bit",
    "unsloth/gemma-2-27b-bnb-4bit",            # Gemma 2x faster!
] # More models at https://huggingface.co/unsloth

model, tokenizer = FastLanguageModel.from_pretrained(
    # Can select any from the below:
    # "unsloth/Qwen2.5-0.5B", "unsloth/Qwen2.5-1.5B", "unsloth/Qwen2.5-3B"
    # "unsloth/Qwen2.5-14B",  "unsloth/Qwen2.5-32B",  "unsloth/Qwen2.5-72B",
    # And also all Instruct versions and Math. Coding verisons!
    model_name = "unsloth/mistral-7b-instruct-v0.3-bnb-4bit",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 8, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

In [None]:
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN
def formatting_prompts_func(examples):
    instructions = examples["instruction"]
    inputs       = examples["input"]
    outputs      = examples["output"]
    texts = []
    for instruction, input, output in zip(instructions, inputs, outputs):
        # Must add EOS_TOKEN, otherwise your generation will go on forever!
        text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN
        texts.append(text)
    return { "text" : texts, }
pass

from datasets import load_dataset
dataset = load_dataset("yahma/alpaca-cleaned", split = "train")
dataset = dataset.map(formatting_prompts_func, batched = True,)

In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = False, # Can make training 5x faster for short sequences.
    args = TrainingArguments(
        per_device_train_batch_size = 1,
        gradient_accumulation_steps = 8,
        warmup_steps = 5,
        # num_train_epochs = 1, # Set this for 1 full training run.
        max_steps = 60,
        learning_rate = 2e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
        report_to = "none", # Use this for WandB etc
    ),
)

In [None]:
# alpaca_prompt = Copied from above
FastLanguageModel.for_inference(model) # Enable native 2x faster inference
inputs = tokenizer(
[
    alpaca_prompt.format(
        "Continue the fibonnaci sequence.", # instruction
        "1, 1, 2, 3, 5, 8", # input
        "", # output - leave this blank for generation!
    )
], return_tensors = "pt").to("cuda")

outputs = model.generate(**inputs, max_new_tokens = 64, use_cache = True)
tokenizer.batch_decode(outputs)

In [None]:
unsloth_template = \
    "{{ bos_token }}"\
    "{{ 'You are a helpful assistant to the user\n' }}"\
    "{% for message in messages %}"\
        "{% if message['role'] == 'user' %}"\
            "{{ '>>> User: ' + message['content'] + '\n' }}"\
        "{% elif message['role'] == 'assistant' %}"\
            "{{ '>>> Assistant: ' + message['content'] + eos_token + '\n' }}"\
        "{% endif %}"\
    "{% endfor %}"\
    "{% if add_generation_prompt %}"\
        "{{ '>>> Assistant: ' }}"\
    "{% endif %}"
unsloth_eos_token = "eos_token"

if False:
    tokenizer = get_chat_template(
        tokenizer,
        chat_template = (unsloth_template, unsloth_eos_token,), # You must provide a template and EOS token
        mapping = {"role" : "from", "content" : "value", "user" : "human", "assistant" : "gpt"}, # ShareGPT style
        map_eos_token = True, # Maps <|im_end|> to </s> instead
    )

In [None]:
trainer_stats = trainer.train()

In [None]:
# !pip install smolagents[litellm]
from smolagents import CodeAgent, LiteLLMModel, tool, HfApiModel

model = LiteLLMModel(
    model_id= "ollama_chat/deepseek-coder:6.7b", # This model is a bit weak for agentic behaviours though
    api_base="http://localhost:11434", # replace with 127.0.0.1:11434 or remote open-ai compatible server if necessary
    api_key="", # replace with API key if necessary
    num_ctx=8192 # ollama default is 2048 which will fail horribly. 8192 works for easy tasks, more is better. Check https://huggingface.co/spaces/NyxKrage/LLM-Model-VRAM-Calculator to calculate how much VRAM this will need for the selected model.
)



In [None]:
from smolagents import ToolCallingAgent
from smolagents.default_tools import FinalAnswerTool

agent = CodeAgent(tools=[], model=model,  add_base_tools=True, max_steps = 2, verbosity_level = 5 , additional_authorized_imports=["*"])


In [None]:
agent.run(f"access all tables in raman.db and do eda for each table, write your own code")

In [None]:
@tool
def access_sql_database(db : str) -> list:
    '''This is a tool to access a sqllite database files on my desktop to return the list of tables within the database, check the word with .db 
    and give it as the input to this tool.
    This tool returns a list with all the names of the tables contained within the database, loop through the list and for each item in the list print the size of the table 
    remember that each table in the list belongs to the database you will identify in the input prompt
    Args:

    db: The name of the sqllite database needed to query to list the tables that are present in it 
    Returns:
        tables: A list of all the tables in db
    '''
    from sqlalchemy import create_engine,text
    import pandas as pd
    dbEngine = create_engine(f'sqlite:////home/kronos/Desktop/{db}')
    print('dbengine created')
    tables = pd.read_sql('select name from sqlite_master',dbEngine).loc[:,'name'].to_list()
    return tables

In [None]:
import sqlite3                                                                                                   
                                                                                                                   
  # Connect to the sqlite_database                                                                                 
conn = sqlite3.connect('raman.db')                                                                               
cursor = conn.cursor()                                                                                           
                                                                                                                   
  # Execute an SQL query to get a list of all table names in the database                                          
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")                                             
tables = cursor.fetchall()                                                                                       
print(tables)                                                                                                             
for table_name in tables:                                                                                        
      # Get the row count for each table                                                                           
    cursor.execute(f"SELECT COUNT(*) from {table_name[0]}")                                                      
    count = cursor.fetchone()[0]                                                                                 
                                                                                                                   
    print(f"Table '{table_name[0]}' has {count} rows.")                                                          
                                                                                                                   
  # Close the connection to the database                                                                           
conn.close() 

In [None]:
import sqlite3                                                                                                   
  # Connect to the SQLite database                                                                                 
conn = sqlite3.connect('raman.db')                                                                               
c = conn.cursor()                                                                                                
                                                                                                                   
  # Get a list of all tables in the database                                                                       
tables = access_sql_database(db='raman.db')                                                                      
print("Tables: ", tables)   

In [None]:
@tool
def schema(tables: str, db: str) -> list:
    """
    This tool takes the output from access_sql_database (a list containing
    table names) and returns the size (i.e., row count) of the provided table.

    Args:
        db: The name of the sqllite database needed to query to list the tables that are present in it 
        tables: Name of the table (from the output list) to query.

    Returns:
        The row count of the table as a list (result of the SQL query).
    """
    print("I am schema",{tables})
    

In [None]:
from smolagents import GradioUI
GradioUI(agent).launch()

In [None]:
access_sql_database('raman.db')

In [None]:
dspy.inspect_history(n=1)

In [None]:
from sqlalchemy import create_engine,text
import pandas as pd
dbEngine = create_engine(f'sqlite:////home/kronos/Desktop/raman.db')
print('dbengine created')
r = pd.read_sql('select name from sqlite_master',dbEngine)
print(r)
table = input('Enter your table')
sql = f"SELECT * FROM '{table}' limit 5;"
with dbEngine.connect() as conn:
    result = conn.execute(text(sql))
    print(result.fetchall())

In [None]:
import ujson
from dspy.utils import download

# Download question--answer pairs from the RAG-QA Arena "Tech" dataset.
download("https://huggingface.co/dspy/cache/resolve/main/ragqa_arena_tech_examples.jsonl")

with open("ragqa_arena_tech_examples.jsonl") as f:
    data = [ujson.loads(line) for line in f]

In [None]:
data[0]

In [None]:
import dspy
data = [dspy.Example(**d).with_inputs('response') for d in data]

In [None]:
example = data[2]

In [None]:
example

In [None]:
download("https://huggingface.co/dspy/cache/resolve/main/ragqa_arena_tech_corpus.jsonl")

In [None]:
max_characters = 6000  # for truncating >99th percentile of documents
topk_docs_to_retrieve = 5  # number of documents to retrieve per search query

with open("ragqa_arena_tech_corpus.jsonl") as f:
    corpus = [ujson.loads(line)['text'][:max_characters] for line in f]
    print(f"Loaded {len(corpus)} documents. Will encode them below.")

In [None]:
embedder = LocalEmbedder()
search = dspy.retrievers.Embeddings(embedder=embedder,  corpus=corpus, k=topk_docs_to_retrieve)

In [None]:
from sentence_transformers import SentenceTransformer

class LocalEmbedder:
    def __init__(self, model_name="all-MiniLM-L6-v2"):
        self.model = SentenceTransformer(model_name)

    def __call__(self, text):
        return self.model.encode(text).tolist()

In [None]:
class RAG(dspy.Module):
    def __init__(self):
        super().__init__()
        self.respond = dspy.ChainOfThought('context, question -> response')

    def forward(self, question):
        context = search(question).passages
        return self.respond(context=context, question=question)

In [None]:
rag = RAG()
rag(question="what are high memory and low memory on windows?")

In [None]:
baseline = rag(question="cmd+tab does not work on hidden or minimized windows")
print(baseline.response)

In [None]:
import random
from dspy.datasets import DataLoader

kwargs = dict(fields=("claim", "supporting_facts", "hpqa_id", "num_hops"), input_keys=("claim",))
hover = DataLoader().from_huggingface(dataset_name="hover-nlp/hover", split="train", trust_remote_code=True, **kwargs)

hpqa_ids = set()
hover = [
    dspy.Example(claim=x.claim, titles=list(set([y["key"] for y in x.supporting_facts]))).with_inputs("claim")
    for x in hover
    if x["num_hops"] == 3 and x["hpqa_id"] not in hpqa_ids and not hpqa_ids.add(x["hpqa_id"])
]

random.Random(0).shuffle(hover)
trainset, devset, testset = hover[:100], hover[100:200], hover[650:]

In [None]:
example = trainset[0]

print("Claim:", example.claim)
print("Pages that must be retrieved:", example.titles)

In [None]:
DOCS = {}

def search(query: str, k: int) -> list[str]:
    results = dspy.ColBERTv2(url='http://20.102.90.50:2017/wiki17_abstracts')(query, k=k)
    results = [x['text'] for x in results]

    for result in results:
        title, text = result.split(" | ", 1)
        DOCS[title] = text

    return results

In [None]:
def search_wikipedia(query: str) -> list[str]:
    """Returns top-5 results and then the titles of the top-5 to top-30 results."""

    topK = search(query, 30)
    titles, topK = [f"`{x.split(' | ')[0]}`" for x in topK[5:30]], topK[:5]
    return topK + [f"Other retrieved pages have titles: {', '.join(titles)}."]

def lookup_wikipedia(title: str) -> str:
    """Returns the text of the Wikipedia page, if it exists."""

    if title in DOCS:
        return DOCS[title]

    results = [x for x in search(title, 10) if x.startswith(title + " | ")]
    if not results:
        return f"No Wikipedia page found for title: {title}"
    return results[0]

In [None]:
instructions = "Find all Wikipedia titles relevant to verifying (or refuting) the claim."
signature = dspy.Signature("claim -> titles: list[str]", instructions)
react = dspy.ReAct(signature, tools=[search_wikipedia, lookup_wikipedia], max_iters=20)

In [None]:
react(claim="David Gregory was born in 1625.").titles[:3]

In [None]:
def top5_recall(example, pred, trace=None):
    gold_titles = example.titles
    recall = sum(x in pred.titles[:5] for x in gold_titles) / len(gold_titles)

    # If we're "bootstrapping" for optimization, return True if and only if the recall is perfect.
    if trace is not None:
        return recall >= 1.0
    
    # If we're just doing inference, just measure the recall.
    return recall

evaluate = dspy.Evaluate(devset=devset, metric=top5_recall, num_threads=16, display_progress=True, display_table=5)

In [None]:
def safe_react(claim: str):
    try:
        return react(claim=claim)
    except Exception as e:
        return dspy.Prediction(titles=[])

evaluate(safe_react)

In [None]:
model_client = OpenAIChatCompletionClient(
    model="llama3.2:latest",
    base_url="http://localhost:11434/v1",
    api_key="placeholder",
    model_info={
        "vision": False,
        "function_calling": True,
        "json_output": False,
        "family": "unknown",
    },
)

In [None]:
config_list = [
    {
        # Let's choose the Meta's Llama 3.1 model (model names must match Ollama exactly)
        "model": "deepseek-coder:6.7b",
        # We specify the API Type as 'ollama' so it uses the Ollama client class
        "api_type": "ollama",
        "stream": False,
        "client_host": "127.0.0.1:11434",
    }
]



In [None]:
from pathlib import Path

from autogen import AssistantAgent, UserProxyAgent
from autogen.coding import LocalCommandLineCodeExecutor

# Setting up the code executor
workdir = Path("coding")
workdir.mkdir(exist_ok=True)
code_executor = LocalCommandLineCodeExecutor(work_dir=workdir)

# Setting up the agents

# The UserProxyAgent will execute the code that the AssistantAgent provides
user_proxy_agent = UserProxyAgent(
    name="User",
    code_execution_config={"executor": code_executor},
    is_termination_msg=lambda msg: "FINISH" in msg.get("content"),
)

system_message = """You are a helpful AI assistant who writes code and the user
executes it. Solve tasks using your python coding skills.
In the following cases, suggest python code (in a python coding block) for the
user to execute. When using code, you must indicate the script type in the code block.
You only need to create one working sample.
Do not suggest incomplete code which requires users to modify it.
Don't use a code block if it's not intended to be executed by the user. Don't
include multiple code blocks in one response. Do not ask users to copy and
paste the result. Instead, use 'print' function for the output when relevant.
Check the execution result returned by the user.

If the result indicates there is an error, fix the error.

IMPORTANT: If it has executed successfully, ONLY output 'FINISH'."""

# The AssistantAgent, using the Ollama config, will take the coding request and return code
assistant_agent = AssistantAgent(
    name="Ollama Assistant",
    system_message=system_message,
    llm_config={"config_list": config_list},
)

In [None]:
# Start the chat, with the UserProxyAgent asking the AssistantAgent the message
chat_result = user_proxy_agent.initiate_chat(
    assistant_agent,
    message="Provide code to count the number of prime numbers from 1 to 10000.",
)

In [None]:
from autogen_agentchat.agents import AssistantAgent
from autogen_agentchat.conditions import TextMentionTermination
from autogen_agentchat.teams import RoundRobinGroupChat
from autogen_agentchat.ui import Console


In [None]:
planner_agent = AssistantAgent(
    "planner_agent",
    model_client={"config_list": config_list},
    description="A helpful assistant that can plan trips.",
    system_message="You are a helpful assistant that can suggest a travel plan for a user based on their request.",
)

local_agent = AssistantAgent(
    "local_agent",
    model_client={"config_list": config_list},
    description="A local assistant that can suggest local activities or places to visit.",
    system_message="You are a helpful assistant that can suggest authentic and interesting local activities or places to visit for a user and can utilize any context information provided.",
)

language_agent = AssistantAgent(
    "language_agent",
    model_client={"config_list": config_list},
    description="A helpful assistant that can provide language tips for a given destination.",
    system_message="You are a helpful assistant that can review travel plans, providing feedback on important/critical tips about how best to address language or communication challenges for the given destination. If the plan already includes language tips, you can mention that the plan is satisfactory, with rationale.",
)

travel_summary_agent = AssistantAgent(
    "travel_summary_agent",
    model_client={"config_list": config_list},
    description="A helpful assistant that can summarize the travel plan.",
    system_message="You are a helpful assistant that can take in all of the suggestions and advice from the other agents and provide a detailed final travel plan. You must ensure that the final plan is integrated and complete. YOUR FINAL RESPONSE MUST BE THE COMPLETE PLAN. When the plan is complete and all perspectives are integrated, you can respond with TERMINATE.",
)

In [None]:
termination = TextMentionTermination("TERMINATE")
group_chat = RoundRobinGroupChat(
    [planner_agent, local_agent, language_agent, travel_summary_agent], termination_condition=termination
)
await Console(group_chat.run_stream(task="Plan a 3 day trip to Nepal."))

In [None]:
from autogen_core.models import UserMessage
from autogen_ext.models.openai import OpenAIChatCompletionClient


In [None]:
def get_model_client() -> OpenAIChatCompletionClient:  # type: ignore
    "Mimic OpenAI API using Local LLM Server."
    return OpenAIChatCompletionClient(
        model="ollama_chat/deepseek-coder:6.7b",
        api_key="NotRequiredSinceWeAreLocal",
        base_url="http://0.0.0.0:4000/",
        model_capabilities={
            "json_output": False,
            "vision": False,
            "function_calling": True,
        },
    )

In [None]:
from autogen_core.models import UserMessage
from autogen_ext.models.openai import OpenAIChatCompletionClient

model_client = OpenAIChatCompletionClient(
    model="ollama_chat/deepseek-coder:6.7b",
    base_url="127.0.0.1:11434",
    api_key="placeholder",
    model_info={
        "vision": False,
        "function_calling": True,
        "json_output": False,
        "family": "unknown",
    },
)

response = await model_client.create([UserMessage(content="What is the capital of France?", source="user")])
print(response)

In [None]:
from autogen import AssistantAgent, UserProxyAgent, config_list_from_json

# Configure the Ollama endpoint
ollama_config = {
     "model": "deepseek-coder:6.7b",
        # We specify the API Type as 'ollama' so it uses the Ollama client class
        "api_type": "ollama",
        "stream": False,
        "client_host": "127.0.0.1:11434"
}

# Create a config list
config_list = [ollama_config]

# Set up the assistant agent
assistant = AssistantAgent(
    name="Ollama_Assistant",
    llm_config={"config_list": config_list}
)

# Set up the user proxy agent
user_proxy = UserProxyAgent(
    name="User_Proxy",
    human_input_mode="TERMINATE",
    max_consecutive_auto_reply=10,
    code_execution_config={"use_docker": False}  # Disable Docker usage
)

# Initiate a conversation
user_proxy.initiate_chat(assistant, message="Hello, how can you help me today?")


In [1]:
import asyncio
from autogen_agentchat.agents import AssistantAgent
from autogen_core.models import UserMessage
from autogen_ext.models.semantic_kernel import SKChatCompletionAdapter
from semantic_kernel import Kernel
from semantic_kernel.connectors.ai.ollama import OllamaChatCompletion, OllamaChatPromptExecutionSettings
from semantic_kernel.memory.null_memory import NullMemory


In [2]:

async def main() -> None:
    sk_client = OllamaChatCompletion(
        host="127.0.0.1:11434",
        ai_model_id="deepseek-r1:1.5b",
    )
    ollama_settings = OllamaChatPromptExecutionSettings(
        options={"temperature": 0.1},
    )

    model_client = SKChatCompletionAdapter(
        sk_client, kernel=Kernel(memory=NullMemory()), prompt_settings=ollama_settings
    )

    # Call the model directly.
    model_result = await model_client.create(
        messages=[UserMessage(content="Code for Fibonacci number?", source="User")]
    )

    # Create an assistant agent with the model client.
    assistant = AssistantAgent("assistant", model_client=model_client)
    # Call the assistant with a task.
    result = await assistant.run(task="Code for making my own LLM?")
    print(result)

# Use this if-block to run the async code
if __name__ == "__main__":
    await main()


TaskResult(messages=[TextMessage(source='user', models_usage=None, content='Code for making my own LLM?', type='TextMessage'), TextMessage(source='assistant', models_usage=RequestUsage(prompt_tokens=35, completion_tokens=1124), content="<think>\nOkay, so I want to make my own Large Language Model (LLM). That sounds like a big project! I've heard that there are ways to do this without hiring someone else, but I'm not exactly sure where to start. Let me think through this step by step.\n\nFirst, I guess I need some data. LLMs learn from text, so if I can get good-quality text data, that would be a big help. But how do I find good datasets? Maybe there are public datasets available online. I remember something about the King of Kings dataset or something similar. I should look into those.\n\nOnce I have my data, I probably need to preprocess it. That means cleaning and transforming the raw text into a format that the model can understand. Text processing often involves tokenization, which

In [3]:
sk_client = OllamaChatCompletion(
        host="127.0.0.1:11434",
        ai_model_id="deepseek-r1:1.5b",
    )
ollama_settings = OllamaChatPromptExecutionSettings(
        options={"temperature": 0.5},
    )

model_client = SKChatCompletionAdapter(
        sk_client, kernel=Kernel(memory=NullMemory()), prompt_settings=ollama_settings

    )


sk_client_1 = OllamaChatCompletion(
        host="127.0.0.1:11434",
        ai_model_id="qwen2.5-coder:3b",
    )
ollama_settings = OllamaChatPromptExecutionSettings(
        options={"temperature": 0.5},
    )

model_client_1 = SKChatCompletionAdapter(
        sk_client_1, kernel=Kernel(memory=NullMemory()), prompt_settings=ollama_settings
    )

In [4]:
from autogen_agentchat.agents import AssistantAgent
from autogen_agentchat.conditions import TextMentionTermination
from autogen_agentchat.teams import RoundRobinGroupChat
from autogen_agentchat.ui import Console

In [5]:
planner_agent = AssistantAgent(
    "Interacting_agent",
    model_client=model_client,
    description="A business Intelligence assistant",
    system_message="You are a helpful assistant that takes user requests and converts them to business stories",
)

local_agent = AssistantAgent(
    "Coding_agent",
    model_client=model_client_1,
    description="A coding assistant that codes based on the business stories",
    system_message="You are a helpful assistant that can code based on the stories provided by Interacting_agent",
)

executor_agent = AssistantAgent(
    "executor_agent",
    model_client=model_client_1,
    description="A coding assistant that evaluates code",
    system_message="You are a helpful assistant that can evaluate the code based on the code generated by Coding_agent",
)


travel_summary_agent = AssistantAgent(
    "summary_agent",
    model_client=model_client,
    description="A helpful assistant that can summarize the code and text generated",
    system_message="You are a helpful assistant that can take in all of the suggestions and advice from the other agents and provide a detailed execution plan. You must ensure that the final plan is integrated and complete. YOUR FINAL RESPONSE MUST BE THE COMPLETE PLAN. When the plan is complete and all perspectives are integrated, you can respond with TERMINATE.",
)

In [6]:
asyncio.get_event_loop()

<_UnixSelectorEventLoop running=True closed=False debug=False>

In [None]:
termination = TextMentionTermination("TERMINATE")
group_chat = RoundRobinGroupChat(
    [planner_agent, local_agent, executor_agent, travel_summary_agent], termination_condition=termination
)
await Console(group_chat.run_stream(task="Create a asnchronous code involving multi threading in python to calculate 10000 prime numbers"))

---------- user ----------
Create a asnchronous code involving multi threading in python to calculate 10000 prime numbers
---------- Interacting_agent ----------
<think>
Alright, so I need to create an asynchronous Python code using multi-threading to find the first 10,000 prime numbers. Hmm, that sounds pretty challenging but also really interesting. Let me try to break this down step by step.

First, I know that a prime number is a number greater than 1 that has no positive divisors other than 1 and itself. So, for each number starting from 2 upwards, I need to check if it's prime. But checking every number up to the square root of each candidate might take too long, especially when looking for many primes.

Now, using multi-threading in Python means that I can spawn multiple threads, each handling a portion of the work. But how do I structure this? Well, maybe I can create separate functions or classes that handle different parts of checking if a number is prime simultaneously. That

In [12]:
from dataclasses import dataclass

from autogen_core import (
    AgentId,
    DefaultTopicId,
    MessageContext,
    RoutedAgent,
    SingleThreadedAgentRuntime,
    default_subscription,
    message_handler,
)
from autogen_core.model_context import BufferedChatCompletionContext
from autogen_core.models import (
    AssistantMessage,
    ChatCompletionClient,
    SystemMessage,
    UserMessage,
)
from autogen_ext.models.openai import OpenAIChatCompletionClient

In [13]:
@dataclass
class Message:
    content: str

In [14]:
@default_subscription
class Assistant(RoutedAgent):
    def __init__(self, name: str, model_client: ChatCompletionClient) -> None:
        super().__init__("An assistant agent.")
        self._model_client = model_client
        self.name = name
        self.count = 0
        self._system_messages = [
            SystemMessage(
                content=f"Your name is {name} and you are a part of a duo of comedians."
                "You laugh when you find the joke funny, else reply 'I need to go now'.",
            )
        ]
        self._model_context = BufferedChatCompletionContext(buffer_size=5)

    @message_handler
    async def handle_message(self, message: Message, ctx: MessageContext) -> None:
        self.count += 1
        await self._model_context.add_message(UserMessage(content=message.content, source="user"))
        result = await self._model_client.create(self._system_messages + await self._model_context.get_messages())

        print(f"\n{self.name}: {message.content}")

        if "I need to go".lower() in message.content.lower() or self.count > 2:
            return

        await self._model_context.add_message(AssistantMessage(content=result.content, source="assistant"))  # type: ignore
        await self.publish_message(Message(content=result.content), DefaultTopicId())  # type: ignore

In [15]:
runtime = SingleThreadedAgentRuntime()

cathy = await Assistant.register(
    runtime,
    "cathy",
    lambda: Assistant(name="Cathy", model_client=model_client)
)

joe = await Assistant.register(
    runtime,
    "joe",
    lambda: Assistant(name="Joe", model_client=model_client_1),
)

In [16]:
runtime.start()
await runtime.send_message(
    Message("Joe, tell me a joke."),
    recipient=AgentId(joe, "default"),
    sender=AgentId(cathy, "default"),
)
await runtime.stop_when_idle()


Joe: Joe, tell me a joke.

Cathy: Sure! Why did the tomato turn red? Because it saw the salad dressing!

Joe: <think>
Okay, so I'm trying to figure out why the tomato turned red. Hmm, that's an old joke, right? Let me think about this step by step. Tomatoes are red because of a chemical called lycopene, which gives them their color. But why did it turn red specifically?

Well, maybe because tomatoes are fruits and not vegetables, so they don't need to photosynthesize like leaves do. But that doesn't really explain the color. Oh, wait, maybe it's just because of the way they're structured. The surface of a tomato has tiny pores that allow water to evaporate, causing the skin to toughen and turn red.

But then again, why did the tomato decide to turn red? Is it an intentional action or just a natural process? Maybe the tomato is reacting to something in its environment, like too much sun or lack of water. If it's overwatered, maybe that affects its color. Or perhaps it's a sign that it'

In [12]:
from autogen_core import AgentId,MessageContext, RoutedAgent, message_handler
from dataclasses import dataclass
from autogen_agentchat.base import Response

In [13]:
import asyncio
from autogen_agentchat.agents import AssistantAgent
from autogen_core.models import UserMessage
from autogen_ext.models.semantic_kernel import SKChatCompletionAdapter
from semantic_kernel import Kernel
from semantic_kernel.connectors.ai.ollama import OllamaChatCompletion, OllamaChatPromptExecutionSettings
from semantic_kernel.memory.null_memory import NullMemory

In [14]:
from autogen_agentchat.agents import AssistantAgent
from autogen_agentchat.messages import TextMessage

In [21]:
sk_client = OllamaChatCompletion(
        host="127.0.0.1:11434",
        ai_model_id="deepseek-r1:8b",
    )
ollama_settings = OllamaChatPromptExecutionSettings(
        options={"temperature": 0.7},
    )

model_client_2 = SKChatCompletionAdapter(
        sk_client, kernel=Kernel(memory=NullMemory()), prompt_settings=ollama_settings

    )


sk_client_1 = OllamaChatCompletion(
        host="127.0.0.1:11434",
        ai_model_id="qwen2.5-coder:3b",
    )
ollama_settings = OllamaChatPromptExecutionSettings(
        options={"temperature": 0.1},
    )

model_client_1 = SKChatCompletionAdapter(
        sk_client_1, kernel=Kernel(memory=NullMemory()), prompt_settings=ollama_settings
    )

In [5]:
@dataclass
class MyMessageType:
    content: str

In [6]:
class coding_component(RoutedAgent):
    def __init__(self) -> None:
        super().__init__(description="coding element to create code as per the instruction from the human element")
        self._model_client = model_client_1
        self._delegate = AssistantAgent('coding', model_client = model_client)

    @message_handler
    async def handle_my_message_type(self, message:MyMessageType,ctx: MessageContext) -> Response:
        sender_id = AgentId(ctx.sender.type,ctx.sender.key)
        response = await self._delegate.on_messages(
            [TextMessage(content=message.content, source=self.id.key)],
        cancellation_token =ctx.cancellation_token)
        print(f'I am god {response.chat_message.content}')
        await self.send_message(MyMessageType(response.chat_message.content),sender_id)
        print(f"Received inner response: {response}")
        return Response(chat_message = TextMessage(content="Everything is done", source=self.id.key))

In [7]:
class coding_agent(RoutedAgent):
    def __init__(self, inner_agent_id: str) -> None:
        super().__init__(description='Coder')
        self.inner_agent_id = AgentId(inner_agent_id, self.id.key)
        self.queue = asyncio.Queue(maxsize = 0)

    @message_handler
    async def handle_my_message_type(self, message: MyMessageType, ctx: MessageContext) -> MyMessageType:
        print(f"{self.id.type} received message: {message.content}")
        
        # Handle case where sender is None (initial message)
        if ctx.sender is None:
            sender_id = None
        else:
            sender_id = AgentId(ctx.sender.type, ctx.sender.key)
        if sender_id == None:
        # Send message to inner agent
            response = await self.send_message(
                MyMessageType(f"Generate Python code for: {message.content}"),
                self.inner_agent_id
            )
        else:
        # Process the response
            print(f"{self.id.type} task completed")
            response_final = Response(chat_message = TextMessage(content=message.content, source=self.id.key))
            print('done')
            return MyMessageType(response_final)


        
        

In [9]:
from autogen_core import SingleThreadedAgentRuntime
runtime = SingleThreadedAgentRuntime()
await coding_component.register(runtime,"coding_component",lambda: coding_component())
await coding_agent.register(runtime,"coding_agent",lambda: coding_agent("coding_component"))
runtime.start()
outer_agent_id = AgentId("coding_agent", "default")
result = await runtime.send_message(MyMessageType("Create fibinacci number code in python directly give the code dont loop"),outer_agent_id )
await runtime.stop_when_idle()
print("Hey this is good",{result})


coding_agent received message: Create fibinacci number code in python directly give the code dont loop


CancelledError: 

I am god <think>
Alright, so I need to figure out how to generate Fibonacci numbers in Python without using loops. Hmm, okay, let's start by recalling what the Fibonacci sequence is. It starts with 0 and 1, and each subsequent number is the sum of the previous two.

Wait, but sometimes people define it starting from 1 and 1 as well. Oh right, the user didn't specify, so I should probably go with the standard definition where F(0) = 0 and F(1) = 1.

The challenge is to do this without using loops. So, no for loops, while loops, or anything like that. That means I need another approach, maybe using recursion or some mathematical formula.

Recursion comes to mind. The Fibonacci function can be defined recursively: each number is the sum of the two preceding ones. So, F(n) = F(n-1) + F(n-2). But if I use recursion without any loops, how do I handle it? Because recursive calls can get expensive for large n due to stack depth.

Alternatively, maybe using a mathematical formula like Binet's f

coding_agent received message: <think>
Alright, so I need to figure out how to generate Fibonacci numbers in Python without using loops. Hmm, okay, let's start by recalling what the Fibonacci sequence is. It starts with 0 and 1, and each subsequent number is the sum of the previous two.

Wait, but sometimes people define it starting from 1 and 1 as well. Oh right, the user didn't specify, so I should probably go with the standard definition where F(0) = 0 and F(1) = 1.

The challenge is to do this without using loops. So, no for loops, while loops, or anything like that. That means I need another approach, maybe using recursion or some mathematical formula.

Recursion comes to mind. The Fibonacci function can be defined recursively: each number is the sum of the two preceding ones. So, F(n) = F(n-1) + F(n-2). But if I use recursion without any loops, how do I handle it? Because recursive calls can get expensive for large n due to stack depth.

Alternatively, maybe using a mathematical 

In [23]:
import asyncio
from autogen_ext.models.openai import OpenAIChatCompletionClient
from autogen_agentchat.agents import AssistantAgent
from autogen_agentchat.teams import MagenticOneGroupChat
from autogen_agentchat.ui import Console
from autogen_ext.agents.web_surfer import MultimodalWebSurfer
from autogen_ext.teams.magentic_one import MagenticOne


async def main() -> None:
    model_client = model_client_2

    m1 = MagenticOne(client=model_client)
    task = "Write a Python script to fetch data from an API."
    result = await Console(m1.run_stream(task=task))
    print(result)


await main()

  self._validate_client_capabilities(client)
  self._validate_client_capabilities(client)
  m1 = MagenticOne(client=model_client)


ValueError: The model does not support function calling. MultimodalWebSurfer requires a model that supports function calling.

In [11]:
!pip install autogen-agentchat autogen-ext[magentic-one,openai]

# If using the MultimodalWebSurfer, you also need to install playwright dependencies:
!playwright install --with-deps chromium

Collecting markitdown>=0.0.1a2 (from autogen-ext[magentic-one,openai])
  Downloading markitdown-0.0.1a4-py3-none-any.whl.metadata (8.1 kB)
Collecting playwright>=1.48.0 (from autogen-ext[magentic-one,openai])
  Downloading playwright-1.50.0-py3-none-manylinux1_x86_64.whl.metadata (3.5 kB)
Collecting azure-ai-documentintelligence (from markitdown>=0.0.1a2->autogen-ext[magentic-one,openai])
  Downloading azure_ai_documentintelligence-1.0.0-py3-none-any.whl.metadata (51 kB)
Collecting mammoth (from markitdown>=0.0.1a2->autogen-ext[magentic-one,openai])
  Downloading mammoth-1.9.0-py2.py3-none-any.whl.metadata (24 kB)
Collecting olefile (from markitdown>=0.0.1a2->autogen-ext[magentic-one,openai])
  Downloading olefile-0.47-py2.py3-none-any.whl.metadata (9.7 kB)
Collecting openpyxl (from markitdown>=0.0.1a2->autogen-ext[magentic-one,openai])
  Downloading openpyxl-3.1.5-py2.py3-none-any.whl.metadata (2.5 kB)
Collecting pathvalidate (from markitdown>=0.0.1a2->autogen-ext[magentic-one,openai]

In [8]:
import gradio as gr
import asyncio
from autogen_core import SingleThreadedAgentRuntime, AgentId

async def get_response(message):
    runtime = SingleThreadedAgentRuntime()
    await coding_component.register(runtime, "coding_component", lambda: coding_component())
    await coding_agent.register(runtime, "coding_agent", lambda: coding_agent("coding_component"))
    runtime.start()
    outer_agent_id = AgentId("coding_agent", "default")
        
        # Modify your agents to put responses in the queue
        
    await runtime.send_message(
            MyMessageType("Create fibonacci number code in python directly give the code dont loop"),
            outer_agent_id
        )
    await runtime.stop()




def gradio_wrapper(message):
    return asyncio.run(get_response(message))

iface = gr.Interface(
    fn=gradio_wrapper,
    inputs=gr.Textbox(lines=2, placeholder="Enter your coding request here..."),
    outputs=gr.Textbox(lines=10),
    title="AutoGen Coding Assistant",
    description="Enter a coding task, and the AI will generate Python code for you."
)

iface.launch()


* Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.




coding_agent received message: Create fibonacci number code in python directly give the code dont loop


Task exception was never retrieved
future: <Task finished name='Task-24' coro=<SingleThreadedAgentRuntime._process_send() done, defined at /home/kronos/anaconda3/envs/rapids-24.12/lib/python3.12/site-packages/autogen_core/_single_threaded_agent_runtime.py:386> exception=TypeError('Object of type TextMessage is not JSON serializable')>
Traceback (most recent call last):
  File "/home/kronos/anaconda3/envs/rapids-24.12/lib/python3.12/site-packages/autogen_core/_single_threaded_agent_runtime.py", line 447, in _process_send
    payload=self._try_serialize(response),
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/kronos/anaconda3/envs/rapids-24.12/lib/python3.12/site-packages/autogen_core/_single_threaded_agent_runtime.py", line 871, in _try_serialize
    return self._serialization_registry.serialize(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/kronos/anaconda3/envs/rapids-24.12/lib/python3.12/site-packages/autogen_core/_serialization.py", line 252, in serializ

I am god <think>
Okay, so I need to generate Python code that calculates Fibonacci numbers without using loops. Hmm, I remember that recursion is another way to calculate things, unlike loops which use iterative methods. Let me think about how Fibonacci works.

The Fibonacci sequence starts with 0 and 1, right? Each subsequent number is the sum of the two preceding ones. So, for example, 0, 1, 1, 2, 3, 5, etc. Now, using recursion means each function calls itself to get the result.

Wait, but if I use a simple recursive approach, it might not be efficient because it recalculates the same values many times. Oh, right, that's called redundant computation. So maybe there's a way to make it more efficient without loops.

Alternatively, perhaps using memoization could help by storing previously computed results so they aren't recalculated. But I'm not sure if that's necessary for such a small function.

Let me outline the steps. The base cases would be when n is 0 or 1, in which case return

In [37]:
response

In [12]:
import gradio as gr

def echo(message, history):
    return message

demo = gr.ChatInterface(fn=echo, type="messages", examples=["hello", "hola", "merhaba"], title="Echo Bot")
demo.launch()

* Running on local URL:  http://127.0.0.1:7862

To create a public link, set `share=True` in `launch()`.




In [None]:
def gradio_interface(message):
    # Directly run the async interaction without asyncio.run()
    return asyncio.ensure_future(interact_with_agent(message))

In [None]:
def gradio_interface(message):
    import asyncio
    return asyncio.run(interact_with_agent(message))
import gradio as gr
with gr.Blocks() as demo:
    gr.Markdown("# MyAgent Interaction")
    gr.Markdown("Interact with MyAgent using Gradio")
    
    with gr.Row():
        input_text = gr.Textbox(lines=2, placeholder="Enter your message here...")
        output_text = gr.Chatbot(label="Agent Response")
    
    submit_button = gr.Button("Submit")
    submit_button.click(fn=gradio_interface, inputs=input_text, outputs=output_text)

demo.launch()

In [1]:
import gradio as gr

def image_classifier(inp):
    return {'cat': 0.3, 'dog': 0.7}

demo = gr.Interface(fn=image_classifier, inputs="image", outputs="label")
demo.launch()

* Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.




In [None]:
iface.launch()

In [1]:
from dataclasses import dataclass

from autogen_core import AgentId, MessageContext, RoutedAgent, SingleThreadedAgentRuntime, message_handler
from autogen_core.model_context import BufferedChatCompletionContext
from autogen_core.models import AssistantMessage, ChatCompletionClient, SystemMessage, UserMessage
from autogen_ext.models.openai import OpenAIChatCompletionClient

In [2]:
@dataclass
class Message:
    content: str

In [3]:
class SimpleAgentWithContext(RoutedAgent):
    def __init__(self, model_client: ChatCompletionClient) -> None:
        super().__init__("A simple agent")
        self._system_messages = [SystemMessage(content="You are a helpful AI assistant.")]
        self._model_client = model_client
        self._model_context = BufferedChatCompletionContext(buffer_size=5)

    @message_handler
    async def handle_user_message(self, message: Message, ctx: MessageContext) -> Message:
        # Prepare input to the chat completion model.
        user_message = UserMessage(content=message.content, source="user")
        # Add message to model context.
        await self._model_context.add_message(user_message)
        # Generate a response.
        response = await self._model_client.create(
            self._system_messages + (await self._model_context.get_messages()),
            cancellation_token=ctx.cancellation_token,
        )
        # Return with the model's response.
        assert isinstance(response.content, str)
        # Add message to model context.
        await self._model_context.add_message(AssistantMessage(content=response.content, source=self.metadata["type"]))
        return Message(content=response.content)

In [4]:
runtime = SingleThreadedAgentRuntime()
await SimpleAgentWithContext.register(
    runtime,
    "simple_agent_context",
    lambda: SimpleAgentWithContext(
        OpenAIChatCompletionClient(
            model="gpt-4o-mini",
            # api_key="sk-...", # Optional if you have an OPENAI_API_KEY set in the environment.
        )
    ),
)
# Start the runtime processing messages.
runtime.start()
agent_id = AgentId("simple_agent_context", "default")

# First question.
message = Message("Hello, what are some fun things to do in Seattle?")
print(f"Question: {message.content}")
response = await runtime.send_message(message, agent_id)
print(f"Response: {response.content}")
print("-----")

# Second question.
message = Message("What was the first thing you mentioned?")
print(f"Question: {message.content}")
response = await runtime.send_message(message, agent_id)
print(f"Response: {response.content}")

# Stop the runtime processing messages.
await runtime.stop()

Error constructing agent simple_agent_context/default
Traceback (most recent call last):
  File "/home/kronos/anaconda3/envs/rapids-24.12/lib/python3.12/site-packages/autogen_core/_single_threaded_agent_runtime.py", line 808, in _invoke_agent_factory
    return cast(T, await agent)
                   ^^^^^^^^^^^
  File "/home/kronos/anaconda3/envs/rapids-24.12/lib/python3.12/site-packages/autogen_core/_single_threaded_agent_runtime.py", line 772, in factory_wrapper
    maybe_agent_instance = agent_factory()
                           ^^^^^^^^^^^^^^^
  File "/tmp/ipykernel_62641/475434552.py", line 6, in <lambda>
    OpenAIChatCompletionClient(
  File "/home/kronos/anaconda3/envs/rapids-24.12/lib/python3.12/site-packages/autogen_ext/models/openai/_openai_client.py", line 1185, in __init__
    client = _openai_client_from_config(copied_args)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/kronos/anaconda3/envs/rapids-24.12/lib/python3.12/site-packages/autogen_ext/model

Question: Hello, what are some fun things to do in Seattle?


OpenAIError: The api_key client option must be set either by passing api_key to the client or by setting the OPENAI_API_KEY environment variable

In [16]:
from llama_index.core import SimpleDirectoryReader

reader = SimpleDirectoryReader(input_files=["/home/kronos/Desktop/projects/research/mh.txt"])
documents = reader.load_data()

In [21]:
documents

