diff --git a/crates/raysense-cli/src/lib.rs b/crates/raysense-cli/src/lib.rs index bb20790..64843e8 100644 --- a/crates/raysense-cli/src/lib.rs +++ b/crates/raysense-cli/src/lib.rs @@ -781,6 +781,125 @@ fn visualization_html( .iter() .map(|file| (file.path.clone(), file.commits)) .collect(); + let age_by_path: std::collections::HashMap = health + .metrics + .evolution + .file_ages + .iter() + .map(|file| (file.path.clone(), file.age_days)) + .collect(); + let risk_by_path: std::collections::HashMap = health + .metrics + .evolution + .temporal_hotspots + .iter() + .map(|file| (file.path.clone(), file.risk_score)) + .collect(); + let instability_by_module: std::collections::HashMap = health + .metrics + .architecture + .unstable_modules + .iter() + .map(|module| (module.module.clone(), module.instability)) + .collect(); + let directory_for = |path: &str| -> String { + path.rsplit_once('/') + .map(|(dir, _)| dir.to_string()) + .unwrap_or_default() + }; + + let path_for_file: Vec = report + .files + .iter() + .map(|file| file.path.to_string_lossy().into_owned()) + .collect(); + let function_to_file: Vec = report + .functions + .iter() + .map(|function| function.file_id) + .collect(); + let entry_point_files: std::collections::HashSet = report + .entry_points + .iter() + .map(|entry| entry.file_id) + .collect(); + let type_name_to_file: std::collections::HashMap = report + .types + .iter() + .map(|type_fact| (type_fact.name.clone(), type_fact.file_id)) + .collect(); + + let mut imports_out: Vec> = + vec![std::collections::BTreeSet::new(); report.files.len()]; + let mut imports_in: Vec> = + vec![std::collections::BTreeSet::new(); report.files.len()]; + for import in &report.imports { + if let Some(to) = import.resolved_file { + if to == import.from_file { + continue; + } + imports_out[import.from_file].insert(to); + imports_in[to].insert(import.from_file); + } + } + let mut calls_out: Vec> = + vec![std::collections::BTreeSet::new(); report.files.len()]; + let mut calls_in: Vec> = + vec![std::collections::BTreeSet::new(); report.files.len()]; + for edge in &report.call_edges { + let (Some(&from_file), Some(&to_file)) = ( + function_to_file.get(edge.caller_function), + function_to_file.get(edge.callee_function), + ) else { + continue; + }; + if from_file == to_file { + continue; + } + calls_out[from_file].insert(to_file); + calls_in[to_file].insert(from_file); + } + let mut inherits_out: Vec> = + vec![std::collections::BTreeSet::new(); report.files.len()]; + let mut inherits_in: Vec> = + vec![std::collections::BTreeSet::new(); report.files.len()]; + for type_fact in &report.types { + for base in &type_fact.bases { + let Some(&base_file) = type_name_to_file.get(base) else { + continue; + }; + if base_file == type_fact.file_id { + continue; + } + inherits_out[type_fact.file_id].insert(base_file); + inherits_in[base_file].insert(type_fact.file_id); + } + } + let render_paths = |ids: &std::collections::BTreeSet| -> Vec { + ids.iter() + .filter_map(|id| path_for_file.get(*id).cloned()) + .collect() + }; + let adjacency_json = serde_json::to_string( + &report + .files + .iter() + .map(|file| { + let id = file.file_id; + serde_json::json!({ + "path": path_for_file[id], + "imports_out": render_paths(&imports_out[id]), + "imports_in": render_paths(&imports_in[id]), + "calls_out": render_paths(&calls_out[id]), + "calls_in": render_paths(&calls_in[id]), + "inherits_out": render_paths(&inherits_out[id]), + "inherits_in": render_paths(&inherits_in[id]) + }) + }) + .collect::>(), + ) + .unwrap_or_else(|_| "[]".to_string()); + let cells = report .files .iter() @@ -788,12 +907,25 @@ fn visualization_html( let width = ((file.lines as f64 / max_lines as f64) * 100.0).max(8.0); let path = file.path.to_string_lossy(); let churn = churn_by_path.get(path.as_ref()).copied().unwrap_or(0); + let age = age_by_path.get(path.as_ref()).copied().unwrap_or(0); + let risk = risk_by_path.get(path.as_ref()).copied().unwrap_or(0); + let instability = instability_by_module + .get(file.module.as_str()) + .copied() + .unwrap_or(0.0); + let directory = directory_for(path.as_ref()); + let is_entry = if entry_point_files.contains(&file.file_id) { 1 } else { 0 }; format!( - "
{}{} lines{}
", + "
{}{} lines{}
", html_escape(&path), file.lines, html_escape(&file.language_name), churn, + age, + risk, + instability, + html_escape(&directory), + is_entry, html_escape(&path), file.lines, html_escape(&file.language_name) @@ -996,8 +1128,28 @@ table{{border-collapse:collapse;width:100%;margin-top:16px}}td,th{{border-bottom + + + + +
{}
@@ -1015,6 +1167,11 @@ table{{border-collapse:collapse;width:100%;margin-top:16px}}td,th{{border-bottom

Test Gaps

{}
sourceexpected tests
+ + "#, @@ -1079,7 +1380,8 @@ table{{border-collapse:collapse;width:100%;margin-top:16px}}td,th{{border-bottom rules, complex, gaps, - json_script_escape(&telemetry) + json_script_escape(&telemetry), + json_script_escape(&adjacency_json) ) } @@ -2008,13 +2310,16 @@ fn print_health(report: &raysense_core::ScanReport, health: &raysense_core::Heal health.metrics.calls.max_function_fan_out ); println!( - "size max_file_lines={} max_function_lines={} large_files={} long_functions={} file_size_entropy={:.3} file_size_entropy_bits={:.3}", + "size max_file_lines={} max_function_lines={} large_files={} long_functions={} file_size_entropy={:.3} file_size_entropy_bits={:.3} total_lines={} total_comment_lines={} comment_ratio={:.3}", health.metrics.size.max_file_lines, health.metrics.size.max_function_lines, health.metrics.size.large_files, health.metrics.size.long_functions, health.metrics.size.file_size_entropy, - health.metrics.size.file_size_entropy_bits + health.metrics.size.file_size_entropy_bits, + health.metrics.size.total_lines, + health.metrics.size.total_comment_lines, + health.metrics.size.comment_ratio ); println!( "test_gap production_files={} test_files={} files_without_nearby_tests={}", @@ -2098,6 +2403,36 @@ fn print_health(report: &raysense_core::ScanReport, health: &raysense_core::Heal } } + if !health.metrics.evolution.temporal_hotspots.is_empty() { + println!("temporal_hotspots"); + for hotspot in &health.metrics.evolution.temporal_hotspots { + println!( + " risk={} commits={} max_complexity={} {}", + hotspot.risk_score, hotspot.commits, hotspot.max_complexity, hotspot.path, + ); + } + } + + if !health.metrics.evolution.file_ages.is_empty() { + println!("oldest_files"); + for age in &health.metrics.evolution.file_ages { + println!( + " age_days={} last_changed_days={} {}", + age.age_days, age.last_changed_days, age.path, + ); + } + } + + if !health.metrics.evolution.change_coupling.is_empty() { + println!("change_coupling"); + for pair in &health.metrics.evolution.change_coupling { + println!( + " strength={:.3} co_commits={} {} <-> {}", + pair.coupling_strength, pair.co_commits, pair.left, pair.right, + ); + } + } + if !health.metrics.calls.top_called_functions.is_empty() { println!("top_called_functions"); for function in &health.metrics.calls.top_called_functions { diff --git a/crates/raysense-cli/src/mcp.rs b/crates/raysense-cli/src/mcp.rs index 06a6f29..0eb590f 100644 --- a/crates/raysense-cli/src/mcp.rs +++ b/crates/raysense-cli/src/mcp.rs @@ -825,7 +825,13 @@ fn evolution_tool(args: &Value) -> Result { "reason": health.metrics.evolution.reason, "commits_sampled": health.metrics.evolution.commits_sampled, "changed_files": health.metrics.evolution.changed_files, - "top_changed_files": limited(&health.metrics.evolution.top_changed_files, limit) + "top_changed_files": limited(&health.metrics.evolution.top_changed_files, limit), + "author_count": health.metrics.evolution.author_count, + "top_authors": limited(&health.metrics.evolution.top_authors, limit), + "file_ownership": limited(&health.metrics.evolution.file_ownership, limit), + "temporal_hotspots": limited(&health.metrics.evolution.temporal_hotspots, limit), + "file_ages": limited(&health.metrics.evolution.file_ages, limit), + "change_coupling": limited(&health.metrics.evolution.change_coupling, limit) } })) } @@ -1288,81 +1294,33 @@ fn break_cycle_recommendations_tool(args: &Value) -> Result { } fn what_if_sequence_tool(actions: &[Value], root: &Path, config: &RaysenseConfig) -> Result { + let parsed_actions: Vec = actions + .iter() + .enumerate() + .map(|(idx, step)| { + serde_json::from_value(step.clone()).map_err(|err| anyhow!("step {idx}: {err}")) + }) + .collect::>()?; + let before_report = scan_path_with_config(root, config)?; let before_health = compute_health_with_config(&before_report, config); let before = build_baseline(&before_report, &before_health); - let mut current = before_report.clone(); - let mut applied = Vec::new(); - for (idx, step) in actions.iter().enumerate() { - let kind = step - .get("action") - .and_then(Value::as_str) - .ok_or_else(|| anyhow!("step {idx}: missing action"))?; - let outcome = apply_simulate_step(¤t, config, step, kind) - .map_err(|err| anyhow!("step {idx} ({kind}): {err}"))?; - current = outcome; - applied.push(step.clone()); - } + let after_report = + raysense_core::simulate::simulate_sequence(&before_report, config, &parsed_actions) + .map_err(|err| anyhow!(err.to_string()))?; - let after_health = compute_health_with_config(¤t, config); - let after = build_baseline(¤t, &after_health); + let after_health = compute_health_with_config(&after_report, config); + let after = build_baseline(&after_report, &after_health); Ok(json!({ "root": before_report.snapshot.root, - "actions": applied, + "actions": actions, "before": what_if_health_summary(&before_health), "after": what_if_health_summary(&after_health), "diff": diff_baselines(&before, &after) })) } -fn apply_simulate_step( - current: &raysense_core::ScanReport, - config: &RaysenseConfig, - step: &Value, - kind: &str, -) -> Result { - match kind { - "remove_edge" => { - let from = required_str(step, "from")?; - let to = required_str(step, "to")?; - raysense_core::simulate::remove_edge(current, from, to) - .map_err(|err| anyhow!(err.to_string())) - } - "add_edge" => { - let from = required_str(step, "from")?; - let to = required_str(step, "to")?; - raysense_core::simulate::add_edge(current, from, to) - .map_err(|err| anyhow!(err.to_string())) - } - "remove_file" => { - let file = required_str(step, "file")?; - raysense_core::simulate_remove_file(current, file) - .map_err(|err| anyhow!(err.to_string())) - } - "move_file" => { - let from = required_str(step, "from")?; - let to = required_str(step, "to")?; - raysense_core::simulate_move_file(current, config, from, to) - .map_err(|err| anyhow!(err.to_string())) - } - "break_cycle" => { - let from = required_str(step, "from")?; - let to = required_str(step, "to")?; - raysense_core::simulate_break_cycle(current, from, to) - .map_err(|err| anyhow!(err.to_string())) - } - other => Err(anyhow!("unsupported what-if action: {other}")), - } -} - -fn required_str<'a>(value: &'a Value, field: &str) -> Result<&'a str> { - value - .get(field) - .and_then(Value::as_str) - .ok_or_else(|| anyhow!("missing {field}")) -} - fn what_if_health_summary(health: &raysense_core::HealthSummary) -> Value { json!({ "score": health.score, diff --git a/crates/raysense-core/src/facts.rs b/crates/raysense-core/src/facts.rs index a473e7a..e57f57a 100644 --- a/crates/raysense-core/src/facts.rs +++ b/crates/raysense-core/src/facts.rs @@ -73,6 +73,10 @@ pub struct FileFact { pub lines: usize, pub bytes: usize, pub content_hash: String, + /// Number of lines that look like comments (line-prefix or block-body). + /// Heuristic — correctness is best-effort across languages. + #[serde(default)] + pub comment_lines: usize, } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -155,4 +159,9 @@ pub struct TypeFact { pub name: String, pub is_abstract: bool, pub line: usize, + /// Base classes / interfaces named on the type's defining line. + /// Empty when the language declares inheritance separately + /// (e.g. Rust `impl Trait for Type`). + #[serde(default)] + pub bases: Vec, } diff --git a/crates/raysense-core/src/graph.rs b/crates/raysense-core/src/graph.rs index 387889a..05aafeb 100644 --- a/crates/raysense-core/src/graph.rs +++ b/crates/raysense-core/src/graph.rs @@ -118,6 +118,7 @@ mod tests { lines: 1, bytes: 1, content_hash: String::new(), + comment_lines: 0, } } diff --git a/crates/raysense-core/src/health.rs b/crates/raysense-core/src/health.rs index adb671a..a8f0bbe 100644 --- a/crates/raysense-core/src/health.rs +++ b/crates/raysense-core/src/health.rs @@ -515,6 +515,12 @@ pub struct SizeMetrics { pub long_functions: usize, pub file_size_entropy: f64, pub file_size_entropy_bits: f64, + #[serde(default)] + pub total_lines: usize, + #[serde(default)] + pub total_comment_lines: usize, + #[serde(default)] + pub comment_ratio: f64, } #[derive(Debug, Clone, Default, Serialize, Deserialize)] @@ -585,6 +591,12 @@ pub struct EvolutionMetrics { pub top_authors: Vec, #[serde(default)] pub file_ownership: Vec, + #[serde(default)] + pub temporal_hotspots: Vec, + #[serde(default)] + pub file_ages: Vec, + #[serde(default)] + pub change_coupling: Vec, } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -599,6 +611,38 @@ pub struct EvolutionAuthorMetric { pub commits: usize, } +/// `risk_score = commits * max_cyclomatic_complexity` — high values flag files +/// that are both volatile and intricate. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct EvolutionTemporalHotspot { + pub path: String, + pub commits: usize, + pub max_complexity: usize, + pub risk_score: usize, +} + +/// Per-file commit-age window. Timestamps are bounded by the git log lookback, +/// so `first_commit_unix` is the oldest commit *within the sample*, not +/// necessarily the file's true creation date. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct EvolutionFileAge { + pub path: String, + pub first_commit_unix: i64, + pub last_commit_unix: i64, + pub age_days: u64, + pub last_changed_days: u64, +} + +/// Pair of files that change together. `coupling_strength` is the Jaccard +/// similarity of their commit sets in `[0, 1]` (1.0 = always co-changed). +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct EvolutionChangeCoupling { + pub left: String, + pub right: String, + pub co_commits: usize, + pub coupling_strength: f64, +} + #[derive(Debug, Clone, Serialize, Deserialize)] pub struct EvolutionFileOwnership { pub path: String, @@ -791,16 +835,18 @@ fn metrics( hotspots: &[FileHotspot], config: &RaysenseConfig, ) -> MetricsSummary { + let complexity = complexity_metrics(report, config); + let evolution = evolution_metrics(report, &complexity); MetricsSummary { coupling: coupling_metrics(report, hotspots, config), calls: call_metrics(report), architecture: architecture_metrics(report, config), - complexity: complexity_metrics(report, config), + complexity, size: size_metrics(report), entry_points: entry_point_metrics(report), test_gap: test_gap_metrics(report, config), dsm: dsm_metrics(report, config), - evolution: evolution_metrics(report), + evolution, trend: trend_metrics(report), } } @@ -2310,6 +2356,14 @@ fn size_metrics(report: &ScanReport) -> SizeMetrics { let (file_size_entropy, file_size_entropy_bits) = file_size_distribution_entropy(report); + let total_lines: usize = report.files.iter().map(|file| file.lines).sum(); + let total_comment_lines: usize = report.files.iter().map(|file| file.comment_lines).sum(); + let comment_ratio = if total_lines == 0 { + 0.0 + } else { + round3(total_comment_lines as f64 / total_lines as f64) + }; + SizeMetrics { max_file_lines, max_function_lines, @@ -2321,6 +2375,9 @@ fn size_metrics(report: &ScanReport) -> SizeMetrics { .count(), file_size_entropy, file_size_entropy_bits, + total_lines, + total_comment_lines, + comment_ratio, } } @@ -2520,7 +2577,7 @@ fn dsm_metrics(report: &ScanReport, config: &RaysenseConfig) -> DsmMetrics { } } -fn evolution_metrics(report: &ScanReport) -> EvolutionMetrics { +fn evolution_metrics(report: &ScanReport, complexity: &ComplexityMetrics) -> EvolutionMetrics { let root = &report.snapshot.root; let prefix = match git_output(root, ["rev-parse", "--show-prefix"]) { Ok(output) => output.trim().replace('\\', "/"), @@ -2535,7 +2592,13 @@ fn evolution_metrics(report: &ScanReport) -> EvolutionMetrics { let log = match git_output( root, - ["log", "-n", "500", "--format=commit:%H|%ae", "--name-only"], + [ + "log", + "-n", + "500", + "--format=commit:%H|%ae|%at", + "--name-only", + ], ) { Ok(output) => output, Err(reason) => { @@ -2556,7 +2619,10 @@ fn evolution_metrics(report: &ScanReport) -> EvolutionMetrics { let mut file_commits: BTreeMap = BTreeMap::new(); let mut author_commits: BTreeMap = BTreeMap::new(); let mut file_author_commits: BTreeMap> = BTreeMap::new(); + let mut file_age_window: BTreeMap = BTreeMap::new(); + let mut pair_counts: BTreeMap<(String, String), usize> = BTreeMap::new(); let mut current_author: Option = None; + let mut current_timestamp: Option = None; let mut commit_files = HashSet::new(); for line in log.lines() { @@ -2568,19 +2634,24 @@ fn evolution_metrics(report: &ScanReport) -> EvolutionMetrics { flush_commit_files_with_author( &mut file_commits, &mut file_author_commits, + &mut file_age_window, + &mut pair_counts, &mut commit_files, current_author.as_deref(), + current_timestamp, ); commits_sampled += 1; - let author = rest - .split_once('|') - .map(|(_, email)| email.trim().to_string()); + let mut parts = rest.splitn(3, '|'); + let _hash = parts.next(); + let author = parts.next().map(|email| email.trim().to_string()); + let timestamp = parts.next().and_then(|raw| raw.trim().parse::().ok()); if let Some(author) = author.as_ref() { if !author.is_empty() { *author_commits.entry(author.clone()).or_default() += 1; } } current_author = author; + current_timestamp = timestamp; continue; } @@ -2593,8 +2664,11 @@ fn evolution_metrics(report: &ScanReport) -> EvolutionMetrics { flush_commit_files_with_author( &mut file_commits, &mut file_author_commits, + &mut file_age_window, + &mut pair_counts, &mut commit_files, current_author.as_deref(), + current_timestamp, ); let mut top_changed_files: Vec = file_commits @@ -2652,6 +2726,14 @@ fn evolution_metrics(report: &ScanReport) -> EvolutionMetrics { }); file_ownership.truncate(20); + let temporal_hotspots = temporal_hotspots(&file_commits, complexity); + let now_unix = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map(|dur| dur.as_secs() as i64) + .unwrap_or(0); + let file_ages = file_ages(&file_age_window, now_unix); + let change_coupling = change_coupling(&pair_counts, &file_commits); + EvolutionMetrics { available: true, reason: String::new(), @@ -2661,9 +2743,136 @@ fn evolution_metrics(report: &ScanReport) -> EvolutionMetrics { author_count, top_authors, file_ownership, + temporal_hotspots, + file_ages, + change_coupling, } } +/// Files with at least 3 co-commits, ranked by Jaccard similarity. Pairs that +/// only ever appear together are at strength 1.0; pairs that share a few +/// commits but each change independently are much lower. +fn change_coupling( + pair_counts: &BTreeMap<(String, String), usize>, + file_commits: &BTreeMap, +) -> Vec { + const MIN_CO_COMMITS: usize = 3; + let mut pairs: Vec = pair_counts + .iter() + .filter_map(|((a, b), count)| { + if *count < MIN_CO_COMMITS { + return None; + } + let count_a = file_commits.get(a).copied().unwrap_or(0); + let count_b = file_commits.get(b).copied().unwrap_or(0); + let union = count_a + count_b - count; + if union == 0 { + return None; + } + let strength = (*count as f64) / (union as f64); + Some(EvolutionChangeCoupling { + left: a.clone(), + right: b.clone(), + co_commits: *count, + coupling_strength: round3(strength), + }) + }) + .collect(); + pairs.sort_by(|a, b| { + b.coupling_strength + .partial_cmp(&a.coupling_strength) + .unwrap_or(std::cmp::Ordering::Equal) + .then_with(|| b.co_commits.cmp(&a.co_commits)) + .then_with(|| a.left.cmp(&b.left)) + .then_with(|| a.right.cmp(&b.right)) + }); + pairs.truncate(20); + pairs +} + +/// Build the top-N oldest files within the git log window. Returns at most 20 +/// entries sorted by `age_days` descending. Files with a zero or future +/// timestamp (clock skew, missing data) are skipped. +fn file_ages(window: &BTreeMap, now_unix: i64) -> Vec { + if window.is_empty() || now_unix <= 0 { + return Vec::new(); + } + const SECONDS_PER_DAY: i64 = 86_400; + let mut ages: Vec = window + .iter() + .filter_map(|(path, (first, last))| { + if *first <= 0 || *last <= 0 || *first > now_unix { + return None; + } + let age_days = ((now_unix - *first).max(0) / SECONDS_PER_DAY) as u64; + let last_changed_days = ((now_unix - *last).max(0) / SECONDS_PER_DAY) as u64; + Some(EvolutionFileAge { + path: path.clone(), + first_commit_unix: *first, + last_commit_unix: *last, + age_days, + last_changed_days, + }) + }) + .collect(); + ages.sort_by(|a, b| { + b.age_days + .cmp(&a.age_days) + .then_with(|| b.last_changed_days.cmp(&a.last_changed_days)) + .then_with(|| a.path.cmp(&b.path)) + }); + ages.truncate(20); + ages +} + +/// Cross-reference commit churn with cyclomatic complexity to surface files +/// that are both volatile and intricate. Risk = commits × max-cyclomatic; +/// files with risk == 0 (no commits or trivial complexity) are dropped. +fn temporal_hotspots( + file_commits: &BTreeMap, + complexity: &ComplexityMetrics, +) -> Vec { + if file_commits.is_empty() || complexity.all_functions.is_empty() { + return Vec::new(); + } + + let mut max_complexity_per_file: HashMap<&str, usize> = HashMap::new(); + for func in &complexity.all_functions { + let entry = max_complexity_per_file + .entry(func.path.as_str()) + .or_default(); + if func.value > *entry { + *entry = func.value; + } + } + + let mut hotspots: Vec = file_commits + .iter() + .filter_map(|(path, commits)| { + let max_cc = max_complexity_per_file.get(path.as_str()).copied()?; + let risk = commits.saturating_mul(max_cc); + if risk == 0 { + return None; + } + Some(EvolutionTemporalHotspot { + path: path.clone(), + commits: *commits, + max_complexity: max_cc, + risk_score: risk, + }) + }) + .collect(); + + hotspots.sort_by(|a, b| { + b.risk_score + .cmp(&a.risk_score) + .then_with(|| b.commits.cmp(&a.commits)) + .then_with(|| a.path.cmp(&b.path)) + }); + hotspots.truncate(10); + hotspots +} + fn bus_factor_for(sorted: &[(&String, &usize)], total: usize) -> usize { if total == 0 { return 0; @@ -2679,23 +2888,57 @@ fn bus_factor_for(sorted: &[(&String, &usize)], total: usize) -> usize { sorted.len().max(1) } +/// Cap on files-per-commit considered for pair counting. A merge or +/// repo-wide rename touches hundreds of files but expresses no real coupling +/// signal; capping keeps pair generation `O(N²)` bounded. +const MAX_FILES_PER_COMMIT_FOR_COUPLING: usize = 50; + fn flush_commit_files_with_author( file_commits: &mut BTreeMap, file_author_commits: &mut BTreeMap>, + file_age_window: &mut BTreeMap, + pair_counts: &mut BTreeMap<(String, String), usize>, commit_files: &mut HashSet, author: Option<&str>, + timestamp: Option, ) { + if commit_files.len() <= MAX_FILES_PER_COMMIT_FOR_COUPLING { + let sorted: Vec<&String> = { + let mut v: Vec<&String> = commit_files.iter().collect(); + v.sort(); + v + }; + for i in 0..sorted.len() { + for j in (i + 1)..sorted.len() { + let key = (sorted[i].clone(), sorted[j].clone()); + *pair_counts.entry(key).or_default() += 1; + } + } + } for path in commit_files.drain() { *file_commits.entry(path.clone()).or_default() += 1; if let Some(author) = author { if !author.is_empty() { *file_author_commits - .entry(path) + .entry(path.clone()) .or_default() .entry(author.to_string()) .or_default() += 1; } } + if let Some(ts) = timestamp { + file_age_window + .entry(path) + .and_modify(|(first, last)| { + if ts < *first { + *first = ts; + } + if ts > *last { + *last = ts; + } + }) + .or_insert((ts, ts)); + } } } @@ -4833,6 +5076,7 @@ order = 2 lines: 1, bytes: 1, content_hash: String::new(), + comment_lines: 0, } } @@ -4898,4 +5142,160 @@ order = 2 callee_function, } } + + fn complexity_metric(file_id: usize, path: &str, value: usize) -> FunctionComplexityMetric { + FunctionComplexityMetric { + function_id: 0, + file_id, + path: path.to_string(), + name: format!("fn_{file_id}"), + value, + cognitive_value: value, + } + } + + #[test] + fn temporal_hotspots_rank_by_churn_times_complexity() { + let mut file_commits: BTreeMap = BTreeMap::new(); + file_commits.insert("src/hot.rs".to_string(), 12); + file_commits.insert("src/quiet.rs".to_string(), 1); + file_commits.insert("src/simple.rs".to_string(), 50); + file_commits.insert("src/orphan.rs".to_string(), 3); + + let complexity = ComplexityMetrics { + all_functions: vec![ + complexity_metric(0, "src/hot.rs", 4), + complexity_metric(0, "src/hot.rs", 9), + complexity_metric(1, "src/quiet.rs", 20), + complexity_metric(2, "src/simple.rs", 1), + ], + ..ComplexityMetrics::default() + }; + + let hotspots = temporal_hotspots(&file_commits, &complexity); + + assert_eq!(hotspots.len(), 3, "orphan.rs has no complexity → dropped"); + assert!( + hotspots.iter().all(|h| h.path != "src/orphan.rs"), + "files with no functions must not appear", + ); + + let top = &hotspots[0]; + assert_eq!(top.path, "src/hot.rs"); + assert_eq!(top.commits, 12); + assert_eq!( + top.max_complexity, 9, + "uses max function complexity per file" + ); + assert_eq!(top.risk_score, 12 * 9); + + let simple = hotspots.iter().find(|h| h.path == "src/simple.rs").unwrap(); + let quiet = hotspots.iter().find(|h| h.path == "src/quiet.rs").unwrap(); + assert_eq!(simple.risk_score, 50); + assert_eq!(quiet.risk_score, 20); + assert!( + hotspots[1].risk_score >= hotspots[2].risk_score, + "results are sorted by risk_score descending", + ); + } + + #[test] + fn file_ages_rank_oldest_first_and_drop_invalid() { + const DAY: i64 = 86_400; + let now: i64 = 100 * DAY; + let mut window: BTreeMap = BTreeMap::new(); + window.insert("ancient.rs".to_string(), (10 * DAY, 90 * DAY)); + window.insert("recent.rs".to_string(), (95 * DAY, 99 * DAY)); + window.insert("middle.rs".to_string(), (50 * DAY, 60 * DAY)); + // Future timestamp from clock skew is dropped. + window.insert("future.rs".to_string(), (110 * DAY, 110 * DAY)); + // Zero timestamp (no data) is dropped. + window.insert("zero.rs".to_string(), (0, 0)); + + let ages = file_ages(&window, now); + + assert_eq!(ages.len(), 3, "future.rs and zero.rs must be skipped"); + assert_eq!(ages[0].path, "ancient.rs"); + assert_eq!(ages[0].age_days, 90); + assert_eq!(ages[0].last_changed_days, 10); + assert_eq!(ages[1].path, "middle.rs"); + assert_eq!(ages[2].path, "recent.rs"); + assert_eq!(ages[2].age_days, 5); + } + + #[test] + fn file_ages_returns_empty_when_now_is_unknown() { + let mut window: BTreeMap = BTreeMap::new(); + window.insert("a.rs".to_string(), (1, 2)); + assert!(file_ages(&window, 0).is_empty()); + } + + #[test] + fn change_coupling_ranks_pairs_by_jaccard_above_min_threshold() { + let mut pair_counts: BTreeMap<(String, String), usize> = BTreeMap::new(); + pair_counts.insert(("a.rs".to_string(), "b.rs".to_string()), 5); + pair_counts.insert(("a.rs".to_string(), "c.rs".to_string()), 4); + pair_counts.insert(("b.rs".to_string(), "c.rs".to_string()), 2); + pair_counts.insert(("d.rs".to_string(), "e.rs".to_string()), 3); + + let mut file_commits: BTreeMap = BTreeMap::new(); + file_commits.insert("a.rs".to_string(), 5); + file_commits.insert("b.rs".to_string(), 5); + file_commits.insert("c.rs".to_string(), 6); + file_commits.insert("d.rs".to_string(), 3); + file_commits.insert("e.rs".to_string(), 3); + + let pairs = change_coupling(&pair_counts, &file_commits); + + assert_eq!( + pairs.len(), + 3, + "the 2-co-commit pair is below MIN_CO_COMMITS" + ); + assert_eq!(pairs[0].left, "a.rs"); + assert_eq!(pairs[0].right, "b.rs"); + assert!( + (pairs[0].coupling_strength - 1.0).abs() < 1e-9, + "always co-changed" + ); + let de = pairs.iter().find(|p| p.left == "d.rs").unwrap(); + assert!((de.coupling_strength - 1.0).abs() < 1e-9); + let ac = pairs + .iter() + .find(|p| p.left == "a.rs" && p.right == "c.rs") + .unwrap(); + assert!(ac.coupling_strength < 1.0); + } + + #[test] + fn change_coupling_returns_empty_when_no_pair_meets_threshold() { + let mut pair_counts: BTreeMap<(String, String), usize> = BTreeMap::new(); + pair_counts.insert(("a.rs".to_string(), "b.rs".to_string()), 1); + let mut file_commits: BTreeMap = BTreeMap::new(); + file_commits.insert("a.rs".to_string(), 1); + file_commits.insert("b.rs".to_string(), 1); + let pairs = change_coupling(&pair_counts, &file_commits); + assert!(pairs.is_empty()); + } + + #[test] + fn temporal_hotspots_skip_zero_risk() { + let mut file_commits: BTreeMap = BTreeMap::new(); + file_commits.insert("src/zero.rs".to_string(), 0); + file_commits.insert("src/some.rs".to_string(), 4); + + let complexity = ComplexityMetrics { + all_functions: vec![ + complexity_metric(0, "src/zero.rs", 5), + complexity_metric(1, "src/some.rs", 0), + ], + ..ComplexityMetrics::default() + }; + + let hotspots = temporal_hotspots(&file_commits, &complexity); + assert!( + hotspots.is_empty(), + "either factor being zero means no risk score", + ); + } } diff --git a/crates/raysense-core/src/profile.rs b/crates/raysense-core/src/profile.rs index ea82b5a..abf6e26 100644 --- a/crates/raysense-core/src/profile.rs +++ b/crates/raysense-core/src/profile.rs @@ -101,6 +101,7 @@ mod tests { lines: 1, bytes: 1, content_hash: String::new(), + comment_lines: 0, } } } diff --git a/crates/raysense-core/src/scanner.rs b/crates/raysense-core/src/scanner.rs index a10604d..2f2a2a0 100644 --- a/crates/raysense-core/src/scanner.rs +++ b/crates/raysense-core/src/scanner.rs @@ -147,6 +147,7 @@ pub fn scan_path_with_config( lines: content.lines().count(), bytes: content.len(), content_hash: hash_content(&content), + comment_lines: count_comment_lines(&content), }; let mut file_functions = if let Some(plugin) = plugin.as_ref() { @@ -1589,7 +1590,9 @@ fn collect_tree_sitter_imports( match node.kind() { "use_declaration" => { if let Some(target) = rust_use_target(content, node) { - imports.push(new_import(file_id, &target, "use")); + for expanded in expand_brace_targets(&target) { + imports.push(new_import(file_id, &expanded, "use")); + } } } "mod_item" => { @@ -1635,6 +1638,68 @@ fn rust_use_target(content: &str, node: Node<'_>) -> Option { ) } +/// Heuristic count of comment lines. A line is treated as a comment if its +/// first non-whitespace token is one of `//`, `#`, `--`, `;` (lisp/asm), +/// `*` (continuation of a `/*` block), or if the line falls between `/* */` +/// markers. Cross-language and conservative — the goal is a comparable ratio, +/// not exact counts. +fn count_comment_lines(content: &str) -> usize { + let mut count = 0; + let mut in_block = false; + for raw_line in content.lines() { + let line = raw_line.trim_start(); + if in_block { + count += 1; + if line.contains("*/") { + in_block = false; + } + continue; + } + if line.starts_with("/*") { + count += 1; + if !line.contains("*/") { + in_block = true; + } + continue; + } + if line.starts_with("//") + || line.starts_with('#') + || line.starts_with("--") + || line.starts_with(';') + || line.starts_with('*') + { + count += 1; + } + } + count +} + +/// Fan a single `prefix::{a, b, c}` style target out into `["prefix::a", +/// "prefix::b", "prefix::c"]`. Inputs without braces pass through unchanged +/// so callers can use this unconditionally. Nested braces are not supported — +/// only the first brace group is expanded. +fn expand_brace_targets(target: &str) -> Vec { + let Some(open) = target.find('{') else { + return vec![target.to_string()]; + }; + let Some(close_rel) = target[open..].find('}') else { + return vec![target.to_string()]; + }; + let close = open + close_rel; + let prefix = &target[..open]; + let suffix = &target[close + 1..]; + let items: Vec = target[open + 1..close] + .split(',') + .map(|s| s.trim()) + .filter(|s| !s.is_empty()) + .map(|item| format!("{prefix}{item}{suffix}")) + .collect(); + if items.is_empty() { + return vec![target.to_string()]; + } + items +} + fn rust_mod_target(content: &str, node: Node<'_>) -> Option { let text = node_text(content, node)?; if text.contains('{') { @@ -1939,17 +2004,113 @@ fn extract_types( continue; } let name = type_name_from_line(clean).unwrap_or_default(); + let bases = extract_base_class_names(clean); + let abstract_by_base = plugin.is_some_and(|plugin| { + !plugin.abstract_base_classes.is_empty() + && bases.iter().any(|base| { + plugin + .abstract_base_classes + .iter() + .any(|known| known == base) + }) + }); out.push(TypeFact { type_id: 0, file_id, name, - is_abstract, + is_abstract: is_abstract || abstract_by_base, line: idx + 1, + bases, }); } out } +/// Generic base-class parser. Handles four common shapes that put +/// inheritance on the same line as the type name: +/// `class Foo extends Bar implements Baz, Qux` (Java/Kotlin/TS/JS) +/// `class Foo with Bar with Baz` (Scala — also extends/with) +/// `class Foo : public Bar, virtual Baz` (C++/C#) +/// `class Foo(Bar, Baz):` (Python) +/// Returns identifiers stripped of access keywords (`public`, `virtual`, +/// `protected`, `private`). +fn extract_base_class_names(line: &str) -> Vec { + const TERMINATORS: &[char] = &['{', ';', '\n']; + const STOP_KEYWORDS: &[&str] = &[" extends ", " implements ", " with "]; + + let mut bases: Vec = Vec::new(); + + if let Some(start) = line.find('(') { + if let Some(end_rel) = line[start..].find(')') { + for token in split_base_tokens(&line[start + 1..start + end_rel]) { + bases.push(token); + } + } + } + + for keyword in STOP_KEYWORDS { + let mut cursor = 0; + while let Some(idx) = line[cursor..].find(keyword) { + let after = &line[cursor + idx + keyword.len()..]; + let mut segment_end = after.find(TERMINATORS).unwrap_or(after.len()); + for other in STOP_KEYWORDS { + if let Some(other_idx) = after.find(other) { + if other_idx < segment_end { + segment_end = other_idx; + } + } + } + for token in split_base_tokens(&after[..segment_end]) { + bases.push(token); + } + cursor += idx + keyword.len() + segment_end; + } + } + + if let Some(colon) = line.find(':') { + let leading = &line[..colon]; + let looks_like_class = leading.contains("class ") || leading.contains("struct "); + if looks_like_class && !leading.contains('(') { + let after = &line[colon + 1..]; + let segment_end = after.find(TERMINATORS).unwrap_or(after.len()); + for token in split_base_tokens(&after[..segment_end]) { + bases.push(token); + } + } + } + + bases.retain(|name| !name.is_empty()); + bases.sort(); + bases.dedup(); + bases +} + +fn split_base_tokens(segment: &str) -> Vec { + segment + .split(',') + .map(|item| item.trim()) + .filter(|item| !item.is_empty()) + .map(|item| { + let mut words: Vec<&str> = item + .split_whitespace() + .filter(|word| { + !matches!( + *word, + "public" | "protected" | "private" | "virtual" | "static" | "final" + ) + }) + .collect(); + if words.is_empty() { + String::new() + } else { + let last = words.pop().unwrap(); + last.trim_end_matches([',', ';', '{']).to_string() + } + }) + .filter(|name| !name.is_empty()) + .collect() +} + fn type_name_from_line(line: &str) -> Option { let mut iter = line.split_whitespace(); let mut leading = iter.next()?; @@ -2593,6 +2754,62 @@ fn helper() {} assert_eq!(imports[0].target, "crate::graph"); } + #[test] + fn count_comment_lines_handles_common_languages() { + let rust = "// header\nfn main() {}\n/// doc\n/* block\n inside\n*/\nlet x = 1;\n"; + assert_eq!( + count_comment_lines(rust), + 5, + "// + /// + /* + inside + */ all count", + ); + let python = "# top\n\"\"\"hi\"\"\"\nx = 1 # trailing\n# another\n"; + assert_eq!( + count_comment_lines(python), + 2, + "Only line-prefix # is counted, not trailing or docstrings", + ); + let none = "fn main() { let x = 1; }\n"; + assert_eq!(count_comment_lines(none), 0); + } + + #[test] + fn expand_brace_targets_handles_common_shapes() { + assert_eq!(expand_brace_targets("foo::bar"), vec!["foo::bar"]); + assert_eq!( + expand_brace_targets("foo::{a, b, c}"), + vec!["foo::a", "foo::b", "foo::c"], + ); + assert_eq!( + expand_brace_targets("foo::{ a , b }"), + vec!["foo::a", "foo::b"], + "trims whitespace per item", + ); + assert_eq!( + expand_brace_targets("foo::{a}"), + vec!["foo::a"], + "single-item brace expansion", + ); + assert_eq!( + expand_brace_targets("foo::{}"), + vec!["foo::{}"], + "empty brace falls back to original target", + ); + assert_eq!( + expand_brace_targets("foo::{a"), + vec!["foo::{a"], + "missing close brace falls back to original target", + ); + } + + #[test] + fn fans_rust_brace_imports_into_separate_targets() { + let content = "use crate::{graph, scanner};"; + let imports = extract_imports(11, Language::Rust, content); + assert_eq!(imports.len(), 2); + assert_eq!(imports[0].target, "crate::graph"); + assert_eq!(imports[1].target, "crate::scanner"); + } + #[test] fn extracts_python_facts() { let content = r#" @@ -3308,11 +3525,13 @@ int run(void) { "use crate::facts::{FileFact, ImportFact};\nmod graph;\nmod tests {\n}\n", ); - assert_eq!(imports.len(), 2); - assert_eq!(imports[0].target, "crate::facts::{FileFact, ImportFact}"); + assert_eq!(imports.len(), 3); + assert_eq!(imports[0].target, "crate::facts::FileFact"); assert_eq!(imports[0].kind, "use"); - assert_eq!(imports[1].target, "graph"); - assert_eq!(imports[1].kind, "mod"); + assert_eq!(imports[1].target, "crate::facts::ImportFact"); + assert_eq!(imports[1].kind, "use"); + assert_eq!(imports[2].target, "graph"); + assert_eq!(imports[2].kind, "mod"); } #[test] @@ -3373,6 +3592,60 @@ int run(void) { ); } + #[test] + fn extract_base_class_names_handles_common_languages() { + assert_eq!( + extract_base_class_names("class Foo extends Bar implements Baz, Qux {"), + vec!["Bar".to_string(), "Baz".to_string(), "Qux".to_string()], + ); + assert_eq!( + extract_base_class_names("class Foo(Bar, Baz):"), + vec!["Bar".to_string(), "Baz".to_string()], + ); + assert_eq!( + extract_base_class_names("class Foo : public Bar, virtual Baz {"), + vec!["Bar".to_string(), "Baz".to_string()], + ); + assert_eq!( + extract_base_class_names("class Foo extends Bar with Baz with Qux {"), + vec!["Bar".to_string(), "Baz".to_string(), "Qux".to_string()], + ); + assert!( + extract_base_class_names("struct Plain;").is_empty(), + "Rust structs declared without inheritance produce no bases", + ); + } + + #[test] + fn extract_types_marks_abstract_when_base_matches_plugin_config() { + let file = FileFact { + file_id: 0, + path: PathBuf::from("src/Animal.py"), + language: Language::Python, + language_name: "python".to_string(), + module: "src.Animal".to_string(), + lines: 1, + bytes: 30, + content_hash: String::new(), + comment_lines: 0, + }; + let content = "class Dog(AbstractAnimal):\n"; + let plugin = LanguagePluginConfig { + name: "python".to_string(), + abstract_base_classes: vec!["AbstractAnimal".to_string()], + concrete_type_prefixes: vec!["class ".to_string()], + ..LanguagePluginConfig::default() + }; + let types = extract_types(0, &file, content, Some(&plugin)); + assert_eq!(types.len(), 1); + assert_eq!(types[0].name, "Dog"); + assert_eq!(types[0].bases, vec!["AbstractAnimal".to_string()]); + assert!( + types[0].is_abstract, + "config-listed abstract base should flip is_abstract on the subclass", + ); + } + #[test] fn extract_types_finds_rust_traits_and_structs() { let file = FileFact { @@ -3384,6 +3657,7 @@ int run(void) { lines: 4, bytes: 80, content_hash: String::new(), + comment_lines: 0, }; let content = "trait Animal {}\npub struct Dog;\nstruct Cat;\nfn meow() {}\n"; let types = extract_types(0, &file, content, None); @@ -3455,6 +3729,7 @@ int run(void) { lines: 1, bytes: 1, content_hash: String::new(), + comment_lines: 0, } } diff --git a/crates/raysense-core/src/simulate.rs b/crates/raysense-core/src/simulate.rs index ee8c4d5..f499ce4 100644 --- a/crates/raysense-core/src/simulate.rs +++ b/crates/raysense-core/src/simulate.rs @@ -23,6 +23,7 @@ use std::path::PathBuf; +use serde::{Deserialize, Serialize}; use thiserror::Error; use crate::facts::{ @@ -33,6 +34,20 @@ use crate::graph::compute_graph_metrics; use crate::health::RaysenseConfig; use crate::scanner::{matching_plugin, module_name}; +/// One step in a what-if simulation chain. Each variant maps to a single-action +/// helper in this module — `simulate_sequence` applies them in order. The +/// JSON shape matches the MCP `raysense_what_if` tool: the discriminator key +/// is `action`, and the field names align with the per-action MCP arguments. +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(tag = "action", rename_all = "snake_case")] +pub enum Action { + RemoveFile { file: String }, + MoveFile { from: String, to: String }, + AddEdge { from: String, to: String }, + RemoveEdge { from: String, to: String }, + BreakCycle { from: String, to: String }, +} + #[derive(Debug, Error)] pub enum SimulateError { #[error("file not found in scan: {0}")] @@ -174,6 +189,7 @@ pub fn remove_file(report: &ScanReport, file_path: &str) -> Result Result { + let mut current = initial.clone(); + for (index, action) in actions.iter().enumerate() { + let result = match action { + Action::RemoveFile { file } => remove_file(¤t, file), + Action::MoveFile { from, to } => move_file(¤t, config, from, to), + Action::AddEdge { from, to } => add_edge(¤t, from, to), + Action::RemoveEdge { from, to } => remove_edge(¤t, from, to), + Action::BreakCycle { from, to } => break_cycle(¤t, from, to), + }; + current = result.map_err(|source| SequenceError { + index, + action: action.clone(), + source, + })?; + } + Ok(current) +} + +/// Annotates a `SimulateError` with which step in a chain failed. +#[derive(Debug, Error)] +#[error("action #{index} ({action:?}) failed: {source}")] +pub struct SequenceError { + pub index: usize, + pub action: Action, + #[source] + pub source: SimulateError, +} + fn file_id_for_path(report: &ScanReport, path: &str) -> Result { report .files @@ -441,6 +494,7 @@ mod tests { lines: 100, bytes: 100, content_hash: String::new(), + comment_lines: 0, } } @@ -855,4 +909,73 @@ mod tests { let err = remove_file(&before, "src/missing.rs").unwrap_err(); assert!(matches!(err, SimulateError::FileNotFound(_))); } + + #[test] + fn simulate_sequence_chains_actions_in_order() { + let files = vec![ + file(0, "src/a.rs"), + file(1, "src/b.rs"), + file(2, "src/c.rs"), + ]; + // a <-> b cycle, plus c isolated. + let imports = vec![import(0, 0, Some(1)), import(1, 1, Some(0))]; + let before = report( + files, + Vec::new(), + imports, + Vec::new(), + Vec::new(), + Vec::new(), + ); + assert!(before.graph.cycle_count >= 1, "setup must contain a cycle"); + + let actions = vec![ + Action::AddEdge { + from: "src/a.rs".to_string(), + to: "src/c.rs".to_string(), + }, + Action::BreakCycle { + from: "src/a.rs".to_string(), + to: "src/b.rs".to_string(), + }, + ]; + let after = simulate_sequence(&before, &RaysenseConfig::default(), &actions).unwrap(); + + assert_eq!( + after.graph.cycle_count, 0, + "the second action breaks the cycle" + ); + assert_eq!( + after.imports.len(), + 2, + "added edge survives, broken edge dropped", + ); + assert!(after + .snapshot + .snapshot_id + .contains("+add_edge:src/a.rs->src/c.rs")); + assert!(after + .snapshot + .snapshot_id + .contains("+break_cycle:src/a.rs->src/b.rs")); + } + + #[test] + fn simulate_sequence_reports_failing_step_index() { + let before = report( + vec![file(0, "src/a.rs")], + Vec::new(), + Vec::new(), + Vec::new(), + Vec::new(), + Vec::new(), + ); + let actions = vec![Action::AddEdge { + from: "src/a.rs".to_string(), + to: "src/missing.rs".to_string(), + }]; + let err = simulate_sequence(&before, &RaysenseConfig::default(), &actions).unwrap_err(); + assert_eq!(err.index, 0); + assert!(matches!(err.source, SimulateError::FileNotFound(_))); + } } diff --git a/crates/raysense-memory/src/lib.rs b/crates/raysense-memory/src/lib.rs index e8cb992..813bdbd 100644 --- a/crates/raysense-memory/src/lib.rs +++ b/crates/raysense-memory/src/lib.rs @@ -1893,6 +1893,7 @@ mod tests { lines, bytes: lines * 10, content_hash: format!("hash-{file_id}"), + comment_lines: 0, } } }