diff --git a/Cargo.lock b/Cargo.lock index f5027db..07009cd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -200,12 +200,26 @@ dependencies = [ "clap", "crossterm", "dirs", + "indicatif", "predicates", "reqwest", "serde", "toml", ] +[[package]] +name = "console" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b430743a6eb14e9764d4260d4c0d8123087d504eeb9c48f2b2a5e810dd369df4" +dependencies = [ + "encode_unicode", + "libc", + "once_cell", + "unicode-width", + "windows-sys 0.61.2", +] + [[package]] name = "convert_case" version = "0.7.1" @@ -326,6 +340,12 @@ dependencies = [ "litrs", ] +[[package]] +name = "encode_unicode" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0" + [[package]] name = "encoding_rs" version = "0.8.35" @@ -738,6 +758,19 @@ dependencies = [ "hashbrown", ] +[[package]] +name = "indicatif" +version = "0.18.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ade6dfcba0dfb62ad59e59e7241ec8912af34fd29e0e743e3db992bd278e8b65" +dependencies = [ + "console", + "portable-atomic", + "unicode-width", + "unit-prefix", + "web-time", +] + [[package]] name = "ipnet" version = "2.11.0" @@ -990,6 +1023,12 @@ version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" +[[package]] +name = "portable-atomic" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483" + [[package]] name = "potential_utf" version = "0.1.4" @@ -1663,6 +1702,18 @@ version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" +[[package]] +name = "unicode-width" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" + +[[package]] +name = "unit-prefix" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "323402cff2dd658f39ca17c789b502021b3f18707c91cdf22e3838e1b4023817" + [[package]] name = "untrusted" version = "0.9.0" @@ -1800,6 +1851,16 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "web-time" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + [[package]] name = "winapi" version = "0.3.9" diff --git a/Cargo.toml b/Cargo.toml index 927cf17..6df5c5e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,6 +17,7 @@ anyhow = "1.0" clap = { version = "4.5", features = ["derive", "env"] } crossterm = "0.29" dirs = "6.0.0" +indicatif = "0.18.2" reqwest = { version = "0.12", features = ["json", "blocking"] } serde = { version = "1", features = ["derive"] } toml = "0.9.8" diff --git a/README.md b/README.md index c6e3ecb..c9561d7 100644 --- a/README.md +++ b/README.md @@ -119,19 +119,17 @@ Commitbot will: ## Configuration -Commitbot looks for a configuration file at: - -```bash -~/.config/commitbot/config.toml -``` +Commitbot looks for a configuration file at: [~/.config/commitbot.toml](./commitbot.toml). These settings are available +to CLI flags, environment variables, or even per-project config. Example: ```toml model = "gpt-4o-mini" -``` -You can override these settings with CLI flags or environment variables. +["mikegarde/commitbot"] +model = "gpt-5-nano" +``` --- @@ -144,14 +142,6 @@ You can override these settings with CLI flags or environment variables. --- -## ⚠️ Privacy Notice - -> At this time, `commitbot` sends staged diffs to OpenAI’s API for analysis. -> -> Future versions will support **self-hosted** and **local** model endpoints (e.g. Ollama, LM Studio, or API-compatible providers) so your code can stay fully private. - ---- - ## License **GPL-3.0-or-later** @@ -160,4 +150,4 @@ See [LICENSE](./LICENSE) for details. --- -_Commitbot is under active development — features and output quality will evolve with each release._ \ No newline at end of file +_Commitbot is under active development — features and output quality will evolve with each release._ diff --git a/commitbot.toml b/commitbot.toml new file mode 100644 index 0000000..d0703e7 --- /dev/null +++ b/commitbot.toml @@ -0,0 +1,30 @@ +# ~/.config/commitbot.toml + +####################################### +# Global defaults (used for all repos) +####################################### +[default] +# Default model if nothing else is specified +model = "gpt-5-nano" + +# Optional: OpenAI-style API key (falls back to env OPENAI_API_KEY) +openai_api_key = "your api key here" + +# 1 = fully serial, >1 = parallel API calls +max_concurrent_requests = 4 + +####################################### +# Per-repo overrides +####################################### +["mikegarde/commitbot"] +model = "gpt-4o-mini" +openai_api_key = "alternative for spend identification" +max_concurrent_requests = 8 + + +["company/enterprise"] +# Enterprise / self-hosted style config +base_url = "https://enterprise.api.endpoint" +model = "enterprise-model-v1" +openai_api_key = "enterprise api key here" +max_concurrent_requests = 2 \ No newline at end of file diff --git a/src/cli_args.rs b/src/cli_args.rs index d93d7f7..f4c987f 100644 --- a/src/cli_args.rs +++ b/src/cli_args.rs @@ -29,6 +29,10 @@ pub struct Cli { #[arg(long, global = true)] pub debug: bool, + /// Max concurrent requests to the LLM API + #[arg(long, global = true)] + pub max: Option, + /// Model name to use (e.g. gpt-4o-mini). If 'none', acts like --no-model. #[arg(short, long, global = true)] pub model: Option, @@ -38,7 +42,7 @@ pub struct Cli { pub no_model: bool, /// API key (otherwise uses OPENAI_API_KEY env var) - #[arg(short = 'k', long, env = "OPENAI_API_KEY", global = true)] + #[arg(short = 'k', long, global = true)] pub api_key: Option, /// Optional: a brief human description of the ticket (for commit/PR summaries) diff --git a/src/config.rs b/src/config.rs index 008d795..a75749a 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1,51 +1,98 @@ -use crate::Cli; +use crate::{git, Cli}; use serde::Deserialize; +use std::collections::HashMap; use std::env; use std::fs; use std::path::PathBuf; +use git::detect_repo_id; /// Final resolved configuration for commitbot. #[derive(Debug, Clone)] pub struct Config { pub openai_api_key: String, pub model: String, + pub max_concurrent_requests: usize, } impl Config { /// Build the final config from CLI flags, environment, TOML file, and defaults. /// - /// Precedence: - /// 1. CLI flags (`--model`) - /// 2. Env var `COMMITBOT_MODEL` - /// 3. TOML `~/.config/commitbot.toml` - /// 4. Hardcoded default ("gpt-5-nano") + /// Precedence (highest to lowest): + /// 1. CLI flags (`--model`, `--api-key`, `--max`) + /// 2. Env vars (`COMMITBOT_MODEL`, `OPENAI_API_KEY`, `COMMITBOT_MAX_CONCURRENT_REQUESTS`) + /// 3. Per-repo table in `~/.config/commitbot.toml` (e.g. ["mikegarde/commitbot"]) + /// 4. [default] table in `~/.config/commitbot.toml` + /// 5. Hardcoded defaults (model = "gpt-5-nano", max_concurrent_requests = 4) pub fn from_sources(cli: &Cli) -> Self { - let file_cfg = load_file_config().unwrap_or_default(); + let file_cfg_root = load_file_config().unwrap_or_default(); + let repo_id = detect_repo_id(); + // Split file config into [default] and repo-specific override (if any). + let default_file_cfg = file_cfg_root.default.unwrap_or_default(); + let repo_file_cfg = repo_id + .as_deref() + .and_then(|id| file_cfg_root.repos.get(id)) + .cloned() + .unwrap_or_default(); + + // CLI values let model_cli = cli.model.clone(); let api_key_cli = cli.api_key.clone(); + let max_cli = cli.max; + + // Env values let model_env = env::var("COMMITBOT_MODEL").ok(); let api_key_env = env::var("OPENAI_API_KEY").ok(); + let max_env = env::var("COMMITBOT_MAX_CONCURRENT_REQUESTS") + .ok() + .and_then(|s| s.parse::().ok()); + // Resolve model let model = model_cli .or(model_env) - .or(file_cfg.model) + .or(repo_file_cfg.model) + .or(default_file_cfg.model) .unwrap_or_else(|| "gpt-5-nano".to_string()); + // Resolve API key (must exist somewhere) let openai_api_key = api_key_cli .or(api_key_env) - .or(file_cfg.openai_api_key) - .expect("OPENAI_API_KEY must be set via env var or CLI"); + .or(repo_file_cfg.openai_api_key) + .or(default_file_cfg.openai_api_key) + .expect("OPENAI_API_KEY must be set via CLI, env var, or config file"); + + // Resolve max concurrency; default to 4 if not specified anywhere + let max_concurrent_requests = max_cli + .or(max_env) + .or(repo_file_cfg.max_concurrent_requests) + .or(default_file_cfg.max_concurrent_requests) + .unwrap_or(4); - Config { model, openai_api_key } + Config { + model, + openai_api_key, + max_concurrent_requests, + } } } -#[derive(Debug, Default, Deserialize)] +#[derive(Debug, Default, Deserialize, Clone)] struct FileConfig { /// Default model to use when not provided via CLI or env. pub model: Option, pub openai_api_key: Option, + pub max_concurrent_requests: Option, +} + +/// Root of the TOML file: +/// - [default] +/// - ["owner/repo"] tables flattened into `repos` +#[derive(Debug, Default, Deserialize)] +struct FileConfigRoot { + pub default: Option, + + #[serde(flatten)] + pub repos: HashMap, } /// Return `~/.config/commitbot.toml` @@ -54,12 +101,13 @@ fn config_path() -> Option { Some(home.join(".config").join("commitbot.toml")) } -fn load_file_config() -> Option { +fn load_file_config() -> Option { let path = config_path()?; if !path.exists() { return None; } let data = fs::read_to_string(&path).ok()?; - toml::from_str::(&data).ok() + toml::from_str::(&data).ok() } + diff --git a/src/git.rs b/src/git.rs index 2db73ac..1476bc7 100644 --- a/src/git.rs +++ b/src/git.rs @@ -191,3 +191,45 @@ pub fn stage_all() -> Result<()> { git_output(&["add", "-A"])?; Ok(()) } + +/// Try to derive a repo identifier like "owner/repo" from `git remote.origin.url`. +pub fn detect_repo_id() -> Option { + use std::process::Command; + + let output = Command::new("git") + .args(["config", "--get", "remote.origin.url"]) + .output() + .ok()?; + + if !output.status.success() { + return None; + } + + let url = String::from_utf8(output.stdout).ok()?; + let trimmed = url.trim().trim_end_matches(".git"); + + // For SSH: git@github.com:owner/repo + // For HTTPS: https://github.com/owner/repo + let path = if let Some(idx) = trimmed.find("://") { + // Strip scheme and host, keep "owner/repo" + let rest = &trimmed[idx + 3..]; + match rest.find('/') { + Some(slash) => &rest[slash + 1..], + None => rest, + } + } else if let Some(idx) = trimmed.find(':') { + // SSH-style: after ':' is "owner/repo" + &trimmed[idx + 1..] + } else { + trimmed + }; + + let segments: Vec<&str> = path.split('/').filter(|s| !s.is_empty()).collect(); + if segments.len() >= 2 { + let owner = segments[segments.len() - 2]; + let repo = segments[segments.len() - 1]; + Some(format!("{}/{}", owner, repo)) + } else { + None + } +} \ No newline at end of file diff --git a/src/llm/mod.rs b/src/llm/mod.rs index 511a3af..f091b9c 100644 --- a/src/llm/mod.rs +++ b/src/llm/mod.rs @@ -5,7 +5,7 @@ use crate::git::{PrItem, PrSummaryMode}; use anyhow::Result; /// Trait for talking to an LLM (real or dummy). -pub trait LlmClient { +pub trait LlmClient: Send + Sync { /// Generate a per-file summary based on diff + metadata. fn summarize_file( &self, diff --git a/src/main.rs b/src/main.rs index 82416d0..fb4a098 100644 --- a/src/main.rs +++ b/src/main.rs @@ -7,13 +7,19 @@ mod setup; use anyhow::{anyhow, Result}; use clap::Parser; use config::Config; -use crate::cli_args::{Cli, Command}; -use crate::git::{current_branch, staged_diff, staged_files, staged_diff_for_file, - write_commit_editmsg, collect_pr_items, PrSummaryMode, stage_all}; -use crate::llm::LlmClient; +use indicatif::ProgressBar; use std::collections::HashSet; use std::io::{self, Write}; +use std::sync::{Arc, Mutex}; +use std::thread; + +use crate::cli_args::{Cli, Command}; +use crate::git::{ + collect_pr_items, current_branch, staged_diff, staged_diff_for_file, staged_files, stage_all, + write_commit_editmsg, PrSummaryMode, +}; +use crate::llm::LlmClient; use crossterm::{ cursor, @@ -71,11 +77,7 @@ fn tprintln(out: &mut W, s: &str) -> io::Result<()> { } /// Arrow-key UI for choosing a FileCategory (no diff preview). -fn categorize_file_interactive( - idx: usize, - total: usize, - path: &str, -) -> Result { +fn categorize_file_interactive(idx: usize, total: usize, path: &str) -> Result { use FileCategory::*; let mut stdout = io::stdout(); @@ -167,8 +169,105 @@ fn categorize_file_interactive( res } -/// Interactive mode: classify files, then do all LLM calls afterward. -fn run_interactive(cli: &Cli, llm: &dyn LlmClient) -> Result<()> { +/// Run per-file summaries concurrently, honoring `max_concurrent_requests`. +fn summarize_files_concurrently( + branch: &str, + file_changes: &mut [FileChange], + indices: &[usize], + ticket_summary: Option<&str>, + llm: &dyn LlmClient, + max_concurrent_requests: usize, + debug: bool, + pb: &ProgressBar, +) -> Result<()> { + if indices.is_empty() { + return Ok(()); + } + + let max_concurrent = max_concurrent_requests.max(1); + + // Store (file_index, result) for all summarizations. + let results: Arc)>>> = + Arc::new(Mutex::new(Vec::new())); + + // Process in chunks of `max_concurrent` so we never have more than that many + // in-flight LLM calls at once. + for chunk in indices.chunks(max_concurrent) { + thread::scope(|scope| { + for &file_idx in chunk { + let llm_ref = llm; + let branch = branch.to_string(); + let ticket_summary = ticket_summary.map(str::to_owned); + let results = Arc::clone(&results); + let pb = pb.clone(); + + // Clone just the data we need from this file so we don't share &mut across threads. + let path = file_changes[file_idx].path.clone(); + let diff = file_changes[file_idx].diff.clone(); + let category = file_changes[file_idx].category; + + scope.spawn(move || { + if debug { + eprintln!("[DEBUG] Summarizing file: {}", path); + } + + let res = (|| -> Result { + let fc = FileChange { + path, + category, + diff, + summary: None, + }; + + let summary = llm_ref.summarize_file( + &branch, + &fc, + ticket_summary.as_deref(), + debug, + )?; + Ok(summary) + })(); + + // Always advance the progress bar for this file, even if it errors. + pb.inc(1); + + let mut lock = results.lock().expect("results mutex poisoned"); + lock.push((file_idx, res)); + }); + } + }); + } + + // Unwrap Arc and Mutex and apply results back onto file_changes. + let results = Arc::try_unwrap(results) + .expect("results Arc still has multiple owners") + .into_inner() + .expect("results mutex poisoned"); + + let mut first_err: Option = None; + + for (idx, res) in results { + match res { + Ok(summary) => { + file_changes[idx].summary = Some(summary); + } + Err(e) => { + if first_err.is_none() { + first_err = Some(e); + } + } + } + } + + if let Some(err) = first_err { + return Err(err); + } + + Ok(()) +} + +/// Interactive mode: classify files, then do all LLM calls afterward (batched with concurrency). +fn run_interactive(cli: &Cli, cfg: &Config, llm: &dyn LlmClient) -> Result<()> { let branch = current_branch()?; let files = staged_files()?; if files.is_empty() { @@ -186,7 +285,7 @@ fn run_interactive(cli: &Cli, llm: &dyn LlmClient) -> Result<()> { let mut file_changes: Vec = Vec::new(); - // Phase 1: fast, purely interactive classification + // Phase 1: Cointeractive classification for (idx, path) in files.iter().enumerate() { let diff = staged_diff_for_file(path)?; @@ -200,35 +299,46 @@ fn run_interactive(cli: &Cli, llm: &dyn LlmClient) -> Result<()> { }); } - // Phase 2: LLM calls (can be slow, but user is done answering questions) + // Phase 2: LLM calls println!(); - println!("Asking the model..."); + println!("Asking {}...", cfg.model); let total = file_changes.len(); + let pb = ProgressBar::new((total + 1) as u64); - for (i, fc) in file_changes.iter_mut().enumerate() { - if matches!(fc.category, FileCategory::Ignored) { - continue; - } + // Pre-increment for ignored files and collect indices that actually need summarization. + let mut indices_to_summarize = Vec::new(); + let mut ignored_count = 0usize; - if cli.debug { - eprintln!( - "[DEBUG] Summarizing file {}/{}: {}", - i + 1, - total, - fc.path - ); + for (idx, fc) in file_changes.iter().enumerate() { + if matches!(fc.category, FileCategory::Ignored) { + pb.inc(1); + ignored_count += 1; + } else { + indices_to_summarize.push(idx); } + } - let summary = llm.summarize_file( - &branch, - fc, - ticket_summary.as_deref(), - cli.debug, - )?; - fc.summary = Some(summary); + if cli.debug { + eprintln!( + "[DEBUG] Summarizing {} files ({} ignored). max_concurrent_requests = {}", + indices_to_summarize.len(), + ignored_count, + cfg.max_concurrent_requests, + ); } + summarize_files_concurrently( + &branch, + &mut file_changes, + &indices_to_summarize, + ticket_summary.as_deref(), + llm, + cfg.max_concurrent_requests, + cli.debug, + &pb, + )?; + // Final commit message let commit_message = llm.generate_commit_message( &branch, @@ -237,6 +347,9 @@ fn run_interactive(cli: &Cli, llm: &dyn LlmClient) -> Result<()> { cli.debug, )?; + pb.inc(1); + pb.finish_with_message("Done."); + println!(); println!("----- Commit Message Preview -----"); println!("{commit_message}"); @@ -371,7 +484,7 @@ fn main() -> Result<()> { ), None => { if cli.ask { - run_interactive(&cli, boxed_client.as_ref()) + run_interactive(&cli, &cfg, boxed_client.as_ref()) } else { run_simple(&cli, boxed_client.as_ref()) }