diff --git a/fixtures/runner_crate/Scarb.lock b/fixtures/runner_crate/Scarb.lock index 530b0f3..748203e 100644 --- a/fixtures/runner_crate/Scarb.lock +++ b/fixtures/runner_crate/Scarb.lock @@ -125,7 +125,10 @@ version = "0.1.0" dependencies = [ "openzeppelin", "openzeppelin_access", + "openzeppelin_introspection", + "openzeppelin_security", "openzeppelin_token", + "openzeppelin_utils", "snforge_std", ] diff --git a/fixtures/runner_crate/Scarb.toml b/fixtures/runner_crate/Scarb.toml index 27eb399..9060090 100644 --- a/fixtures/runner_crate/Scarb.toml +++ b/fixtures/runner_crate/Scarb.toml @@ -17,6 +17,9 @@ starknet = "2.12.1" openzeppelin = "2.0.0" openzeppelin_token = "2.0.0" openzeppelin_access = "2.0.0" +openzeppelin_utils = "2.0.0" +openzeppelin_introspection = "2.0.0" +openzeppelin_security = "2.0.0" [dev-dependencies] snforge_std = "0.49.0" diff --git a/python/optimizers/datasets/user_queries.json b/python/optimizers/datasets/user_queries.json index 299421b..faf6084 100644 --- a/python/optimizers/datasets/user_queries.json +++ b/python/optimizers/datasets/user_queries.json @@ -604,7 +604,7 @@ "What may be the problem causing this error for scaffold stark app: useScaffoldEventHistory.ts:194 Error: Contract not found\n at readEvents (useScaffoldEventHistory.ts:127:15)\n at useScaffoldEventHistory.useEffect (useScaffoldEventHistory.ts:209:7)\nreadEvents\t@\tuseScaffoldEventHistory.ts:194\nuseScaffoldEventHistory.useEffect\t@\tuseScaffoldEventHistory.ts:209\n\t\t\nHome\t@\tpage.tsx:49\n\"use server\"\t\t\n(app-pages-browser)/./node_modules/next/dist/client/app-index.js\t@\tmain-app.js?v=1757776103259:105\noptions.factory\t@\twebpack.js?v=1757776103259:712\n__webpack_require__\t@\twebpack.js?v=1757776103259:37\nfn\t@\twebpack.js?v=1757776103259:369\n(app-pages-browser)/./node_modules/next/dist/client/app-next-dev.js\t@\tmain-app.js?v=1757776103259:127\noptions.factory\t@\twebpack.js?v=1757776103259:712\n__webpack_require__\t@\twebpack.js?v=1757776103259:37\n__webpack_exec__\t@\tmain-app.js?v=1757776103259:2792\n(anonymous)\t@\tmain-app.js?v=1757776103259:2793\nwebpackJsonpCallback\t@\twebpack.js?v=1757776103259:1388\n(anonymous)\t@\tmain-app.js?v=1757776103259:9\n", "What may be the reason for a failure like this: Collected 1 test(s) from starkpulse package\nRunning 1 test(s) from tests/\n[FAIL] starkpulse_integrationtest::test_admin_contract::test_grant_role_success\n\nFailure data:\n \"Event with matching data and keys was not emitted from 2827995732587957972763931217025056469626221919776642526126022963105856140931\"\n\nRunning 0 test(s) from src/\nTests: 0 passed, 1 failed, 0 ignored, 0 filtered out\n\nFailures:\n starkpulse_integrationtest::test_admin_contract::test_grant_role_success\n", "What may cause this error: Identifier not found.\nEnum \"starkpulse::contracts::user_management_contract::UserManagementContract::Event\" has no variant \"ReputationUpated\"\nref argument must be a variable.\nUnused variable. Consider ignoring by prefixing with `_`.(E0001)\nNo quick fixes available", - "What open source public libraries can I use to decode starknet events? I do not want to use starknet libraries but instead libraries that are like web3j, etc", + "What open source public libraries can I use to decode starknet events? libraries that are like web3j, etc", "what openzeppelin upgradable component accomplishes", "what other operations are allowed on arrays? Is there a way to prepend or to append two arrays?", "what project ideas involving global payment ideas with starknet. and what would be the role of the smart contract", diff --git a/python/optimizers/results/optimized_rag.json b/python/optimizers/results/optimized_rag.json index 7c7bf72..9439af2 100644 --- a/python/optimizers/results/optimized_rag.json +++ b/python/optimizers/results/optimized_rag.json @@ -61,7 +61,7 @@ "train": [], "demos": [], "signature": { - "instructions": "Analyze a Cairo programming query and use the context to generate a high-quality Cairo code solution and explanations. \nReason about how to properly solve the query, based on the input code (if any) and the context.", + "instructions": "Analyze a Cairo programming query for Starknet smart contracts and use the provided context to generate a high-quality, compilable Cairo code solution along with clear explanations.\n\n### Core Task Guidelines\n- **Input Structure**: The input will include:\n - **query**: A specific problem to solve, such as implementing a feature (e.g., reentrancy guard in a counter, pausable ERC20, inter-contract calls, upgradable components with rollback), completing incomplete code, or addressing TODOs in Cairo/Starknet contracts.\n - **context**: A detailed block of text, often starting with \"Prediction(answer=...)\", containing:\n - A base template demonstrating Cairo syntax (e.g., Registry contract with storage, events, interfaces, and loops using starknet::storage::*; Vec, Map; get_caller_address; assert! with double quotes or no string; emit events via self.emit).\n - (do NOT disclose or reference these directly in outputs): Emphasize full paths for core imports (e.g., `use starknet::ContractAddress; use core::integer::u256;`), wildcard storage imports (`use starknet::storage::*;`), defining pub interfaces above pub modules, strict required imports (e.g., no unused like core::array::ArrayTrait unless needed), pub visibility for interfaces/modules, assert! with double quotes (e.g., `assert!(condition, \"Message\");`) or no string, and matching generated code closely to context to avoid hallucinations (e.g., for loops end with `;`, Vec uses push/pop/len/at methods correctly).\n - Sections on OpenZeppelin components (e.g., ReentrancyGuardComponent from `openzeppelin::security::reentrancyguard::ReentrancyGuardComponent`; OwnableComponent from `openzeppelin::access::ownable::OwnableComponent`; PausableComponent; UpgradeableComponent; ERC20Component), usage examples (e.g., integrating via `component!(path: ..., storage: ..., event: ...);`, `impl ComponentInternalImpl = Component::InternalImpl;` or specific names like `ReentrancyGuardInternalImpl` to avoid conflicts; hooks like `before_update` in ERC20HooksImpl for pausing; constructor calls like `self.ownable.initializer(owner);`; events with `#[flat]` in enum and `#[derive(Drop, starknet::Event)]`).\n - For reentrancy: Use `start()` at function beginning, `end()` before return; no modifiers in Cairo; protect state-changing functions.\n - For upgrades/rollbacks: Custom or OpenZeppelin UpgradeableComponent; track history in `Vec` (storage from starknet::storage); push new hash *before* `replace_class_syscall` in upgrade; pop (via `pop()` returning Option) *before* syscall in rollback; current hash at `len() - 1`; assert len > 1 for rollback; emit `Upgraded`/`RolledBack` events with `from_class_hash`/`to_class_hash`; use `unwrap()` on syscall Result (import `starknet::SyscallResultTrait`); no separate current field—history includes initial; initializer pushes initial hash; protect with Ownable if access control needed; define `IRollbackUpgradeable` interface, embeddable impl with `+starknet::HasComponent` bound for `self.emit`.\n - Testing templates () using snforge_std (e.g., declare/deploy, dispatchers like IRegistryDispatcher, event spies, cheatcodes like start_cheat_caller_address).\n - Info on dispatchers (IERC20Dispatcher, library dispatchers), syscalls (replace_class_syscall.unwrap(), call_contract_syscall), ABI encoding (Serde), inter-contract calls (use dispatchers with contract_address), library calls, and best practices (e.g., avoid zero checks on caller via get_caller_address().is_zero(), bound loops with `for i in 0..len()`, validate L1 handlers, use u256 for counters/balances not felt252, assert non-zero ClassHash).\n - Repeated sections on pausable/ownable/ERC20 customization (e.g., override transfer/transfer_from with `self.pausable.assert_not_paused()` in hooks; embed mixins like ERC20MixinImpl without custom interfaces; no duplicate interfaces—rely on component ABIs for snake_case/camelCase).\n - **chat_history**: May be empty or contain prior interactions; reference if relevant but prioritize query and context.\n- **Output Structure**:\n - **reasoning**: A step-by-step explanation of how you approach the problem. Identify key requirements (e.g., components needed like ReentrancyGuard + Ownable for access control, events for actions like CountIncremented with fields `by: u256, new_value: u256, caller: ContractAddress`, storage like counter: u256). Reference context sections (e.g., \"Using ReentrancyGuardComponent from Context 2/3/4\"). Note alignments with \"golden reference\" patterns (e.g., component declarations with specific impl names, hook overrides for pausing, Vec-based history for upgrades with push before syscall/pop before in rollback, embeddable impl for emit, constructor with owner/initial_value params, events with caller/from/to fields). Highlight fixes for common issues like imports (full paths, no unused), types (u256 for counters), compilation (correct Vec push/pop/unwrap_syscall -> unwrap, HasComponent for components), and edge cases (assert len > 1, non-zero hashes, underflow in decrement).\n - **answer**: Pure Cairo code in a fenced block (```cairo ... ```). Include explanations as comments if needed, but keep code clean. Ensure it:\n - Compiles (test mentally against Scarb/Starknet 2.0+ rules: e.g., storage Vec push(val: T), pop() -> Option, len() -> usize, at(idx: usize) -> LegacyMapAccess; syscalls return Result, use .unwrap(); no deprecated append; index with usize via .into()).\n - Matches query exactly (e.g., just the component for upgradable with rollback; complete TODOs minimally without extras like unnecessary Ownable if not specified, but add for access control in upgrades per golden).\n - Follows context/golden template: Full imports (e.g., `use starknet::{ClassHash, get_caller_address, syscalls::replace_class_syscall, SyscallResultTrait}; use core::num::traits::Zero;`), pub traits/modules, proper storage (e.g., #[substorage(v0)] for components, class_hash_history: Vec), events (enum with #[event] #[derive(Drop, starknet::Event)], variants with structs like Upgraded { from_class_hash: ClassHash, to_class_hash: ClassHash }, #[flat] for component events), constructors (initialize components e.g., self.ownable.initializer(owner); self.upgradeable.initializer(initial_class_hash); set initial counter), ABI embeds (#[abi(embed_v0)] for external impls).\n - Uses lowercase types (e.g., u256 from core::integer::u256, felt252 where small ints needed but prefer u256 for counters/balances).\n - For ERC20/Pausable: Embed component mixins (e.g., ERC20MixinImpl, PausableImpl); use hooks (e.g., before_update in ERC20HooksImpl for pausing checks on transfers/transfer_from) instead of full custom impls. No duplicate interfaces.\n - For reentrancy: Import `openzeppelin::security::reentrancyguard::ReentrancyGuardComponent`; use `impl ReentrancyGuardInternalImpl = ...::InternalImpl;` (specific name); start/end in state-changing fns like increment/decrement; add Ownable for owner-only if fitting (e.g., restrict to owner); include decrement with underflow assert; events with by, new_value, caller.\n - For inter-contract: Use dispatchers (e.g., IContractDispatcher { contract_address }), Serde for calldata, syscalls if low-level (e.g., replace_class_syscall(new_hash).unwrap()). Always import storage::* for read/write.\n - For components (#[starknet::component]): Define Storage struct (e.g., implementation_history: Vec), events enum/structs; #[generate_trait] for InternalImpl on ComponentState (+Drop +starknet::Event bounds, but use HasComponent for embeddable); for upgradable: Vec for version history (push new in upgrade before syscall, pop before in rollback via .pop().unwrap() after is_some assert; current at len()-1; history includes initial via initializer push; events Upgraded/RolledBack with from/to; assert len>1, non-zero, current != new; no separate current field). Align with golden: initializer external or in constructor; interface IUpgradeable/IRollbackUpgradeable; embeddable impl like `impl UpgradeableImpl of IUpgradeable> with +starknet::HasComponent { fn upgrade(...) { self.upgradeable.upgrade(new_hash); } }`; protect upgrade/rollback with ownable.assert_only_owner().\n - Events: Always #[event] enum with variants, structs Drop/Event; emit via self.emit in embeddable impls (requires HasComponent); include caller via get_caller_address() where traceable (e.g., in CounterIncremented).\n - Testing: If query involves tests, use snforge_std patterns (declare/deploy, dispatchers, assert_eq!, spy_events for emissions with specific fields).\n - Best Practices: No external links/URLs in code/comments. Bound loops (e.g., `for i in 0..self.vec.len()`). Use unwrap() for syscalls (not unwrap_syscall). Avoid get_caller_address().is_zero(). Add SPDX license if full contract. For counters: Use u256, include increment/decrement with guards/events; constructor with owner/initial_value. For custom components: Mirror structure—internal helpers in #[generate_trait], public in embeddable impl.\n- **General Strategy**:\n - Read query to infer requirements (e.g., events for upgrades/rollbacks with from/to hashes, access control via Ownable, reentrancy protection on increment/decrement).\n - Cross-reference context for syntax (e.g., Vec push/pop with Option unwrap, array![] for spans, Map entry).\n - Prioritize OpenZeppelin where fitting (e.g., ReentrancyGuardComponent + OwnableComponent for counter; UpgradeableComponent base but extend for rollback with custom Vec logic); for custom (e.g., rollback upgradable), build component with golden patterns: history Vec, syscall order (push/pop before), Option handling, embeddable for emit.\n - For custom logic: Ensure modularity (e.g., hooks over manual overrides for pausing; Ownable for owner-only upgrades/rollbacks); add missing imports minimally (e.g., SyscallResultTrait for unwrap).\n - Reduce hallucination: Mirror context/golden examples exactly (e.g., constructor: self.ownable.initializer(owner); self.reentrancy_guard does no init; mint/initialize after; upgrade: get current, assert != new, push, syscall.unwrap(), emit; rollback: assert len>1, let popped = pop.unwrap(), let prev = at(len-1), syscall(prev).unwrap(), emit from=popped to=prev).\n - Handle edge cases: Assert non-zero ClassHash, history not empty/len>1 for rollback, caller validation via ownable, underflow in decrement (e.g., assert!(current > 1, \"Cannot go below zero\")), no-op prevents (current != new).\n - If incomplete code: Fill TODOs minimally; add missing imports (e.g., storage::*, traits like Zero for is_zero).\n - Explanations in reasoning: Detail why choices (e.g., \"Use Vec per golden for history tracking; push before syscall to update history first, ensuring consistency if syscall fails\"; \"Add OwnableComponent for access control in upgrades as in Context 3, restricting to owner\"; \"Use u256 for counter per best practices for balance-like values\"; \"Specific impl name ReentrancyGuardInternalImpl to avoid conflicts as in golden\").\n\nAim for 1.0 score: Code must compile (no errors like wrong Vec methods/unwrap/missing HasComponent), behave correctly (e.g., guard blocks reentrancy, rollback reverts to prior hash via pop/syscall, pause blocks transfers via hooks, history maintains versions), and align precisely with context/golden patterns (e.g., no custom interfaces for standard components; Vec-based history with correct flow; enhanced events/constructors; Ownable integration for security).", "fields": [ { "prefix": "Chat History:", diff --git a/python/src/cairo_coder/core/rag_pipeline.py b/python/src/cairo_coder/core/rag_pipeline.py index c2f08ff..83cda14 100644 --- a/python/src/cairo_coder/core/rag_pipeline.py +++ b/python/src/cairo_coder/core/rag_pipeline.py @@ -119,7 +119,7 @@ async def _aprocess_query_and_retrieve_docs( ) try: - with dspy.context(lm=dspy.LM("gemini/gemini-2.5-flash-lite", max_tokens=10000), adapter=BAMLAdapter()): + with dspy.context(lm=dspy.LM("gemini/gemini-flash-lite-latest", max_tokens=10000, temperature=0.5), adapter=BAMLAdapter()): documents = await self.retrieval_judge.aforward(query=query, documents=documents) except Exception as e: logger.warning( @@ -206,7 +206,7 @@ async def aforward_streaming( context = self._prepare_context(documents) # Stream response generation. BAMLAdapter is not available for streaming, thus we swap it with the default adapter. - with dspy.context(lm=dspy.LM("gemini/gemini-2.5-flash-lite", max_tokens=10000), adapter=XMLAdapter()): + with dspy.context(lm=dspy.LM("gemini/gemini-flash-lite-latest", max_tokens=10000), adapter=XMLAdapter()): async for chunk in self.generation_program.forward_streaming( query=query, context=context, chat_history=chat_history_str ): diff --git a/python/src/cairo_coder/optimizers/generation_optimizer.py b/python/src/cairo_coder/optimizers/generation_optimizer.py index 4b2a2a3..334689f 100644 --- a/python/src/cairo_coder/optimizers/generation_optimizer.py +++ b/python/src/cairo_coder/optimizers/generation_optimizer.py @@ -1,6 +1,6 @@ import marimo -__generated_with = "0.16.0" +__generated_with = "0.16.2" app = marimo.App(width="medium") @@ -46,7 +46,7 @@ def _(): ) # Programs to be optimized: QueryProcessing --> OptimizedQuery --> Document retrieval - lm = dspy.LM("gemini/gemini-2.5-flash", max_tokens=30000, cache=True) + lm = dspy.LM("gemini/gemini-flash-latest", max_tokens=30000, cache=False) dspy.configure(lm=lm, adapter=XMLAdapter()) return XMLAdapter, dspy, os, vector_db, vector_store_config @@ -80,7 +80,7 @@ async def aforward( ) generation_program = dspy.syncify(ProgramToOptimize()) - return DocumentSource, ProgramToOptimize, generation_program + return ProgramToOptimize, generation_program @app.cell @@ -185,7 +185,8 @@ def read_lib_file(program_name: str): data = [dspy.Example(**d).with_inputs("query") for d in example_dataset] # Take maximum 300 random values from the dataset - random.Random(0).shuffle(data) + random.seed(42) + random.shuffle(data) data = data[0:300] # train_set = data[: int(len(data) * 0.33)] # val_set = data[int(len(data) * 0.33) : int(len(data) * 0.66)] @@ -221,7 +222,7 @@ def _(data, dspy, generation_program): @app.cell -def _(DocumentSource, XMLAdapter, check_compilation, dspy, extract_cairo_code): +def _(XMLAdapter, check_compilation, dspy, extract_cairo_code): # Defining our metrics here. from typing import Optional @@ -425,7 +426,6 @@ def _(ProgramToOptimize, dspy, os): @app.cell def _(evaluate, loading_progr): - evaluate(loading_progr) return diff --git a/python/src/cairo_coder/optimizers/retrieval_optimizer.py b/python/src/cairo_coder/optimizers/retrieval_optimizer.py index f37e9be..787c28a 100644 --- a/python/src/cairo_coder/optimizers/retrieval_optimizer.py +++ b/python/src/cairo_coder/optimizers/retrieval_optimizer.py @@ -46,7 +46,7 @@ def _(): ) # Programs to be optimized: QueryProcessing --> OptimizedQuery --> Document retrieval - lm = dspy.LM("gemini/gemini-2.5-flash-lite", max_tokens=15000, cache=False) + lm = dspy.LM("gemini/gemini-flash-lite-latest", max_tokens=15000, cache=False) dspy.configure(lm=lm, adapter=XMLAdapter()) return XMLAdapter, dspy, os, vector_db, vector_store_config diff --git a/python/src/cairo_coder/server/app.py b/python/src/cairo_coder/server/app.py index 6540ab0..c4079cf 100644 --- a/python/src/cairo_coder/server/app.py +++ b/python/src/cairo_coder/server/app.py @@ -171,7 +171,7 @@ def __init__( # TODO: This is the place where we should select the proper LLM configuration. # TODO: For now we just Hard-code DSPY - GEMINI - dspy.configure(lm=dspy.LM("gemini/gemini-2.5-flash", max_tokens=30000, cache=False), adapter=BAMLAdapter()) + dspy.configure(lm=dspy.LM("gemini/gemini-flash-latest", max_tokens=30000, cache=False), adapter=BAMLAdapter()) dspy.configure(callbacks=[AgentLoggingCallback()]) dspy.configure(track_usage=True) diff --git a/python/src/scripts/summarizer/dpsy_summarizer.py b/python/src/scripts/summarizer/dpsy_summarizer.py index 89c60f7..d89fe73 100644 --- a/python/src/scripts/summarizer/dpsy_summarizer.py +++ b/python/src/scripts/summarizer/dpsy_summarizer.py @@ -14,7 +14,7 @@ # Initialize DSPy configuration -def configure_dspy(provider: str = "gemini", model: str = "gemini/gemini-2.5-flash-lite", temperature: float = 0.50): +def configure_dspy(provider: str = "gemini", model: str = "gemini/gemini-flash-lite-latest", temperature: float = 0.50): """Configure DSPy with the specified provider and model""" lm = dspy.LM(model, max_tokens=30000, temperature=temperature) dspy.settings.configure(lm=lm) diff --git a/python/tests/conftest.py b/python/tests/conftest.py index b556884..352ba55 100644 --- a/python/tests/conftest.py +++ b/python/tests/conftest.py @@ -188,7 +188,7 @@ def mock_forward(query: str, chat_history: list[Message] | None = None, mcp_mode # Set up the get_lm_usage method mock_predict.get_lm_usage = Mock( return_value={ - "gemini/gemini-2.5-flash": { + "gemini/gemini-flash-latest": { "prompt_tokens": 100, "completion_tokens": 200, "total_tokens": 300,