Load data from recipe runs.

# Release Train Metro Plan Analysis

This notebook analyzes project relationships and generates visualization data for the release train metro plan.

## Data Sources

The analysis uses three main data sources from OpenRewrite recipe runs:
- **ProjectCoordinates.csv**: Maven/Gradle project identifiers (groupId, artifactId) 
- **DependenciesInUse.csv**: Dependencies between projects
- **ParentRelationships.csv**: Parent POM and Gradle parent project relationships

In [None]:
%use dataframe
val projectIds = DataFrame.read("/Users/merlin/IdeaProjects/private/Release-Train-Metro-Plan/src/main/kotlin/data/ProjectCoordinates.csv")
val dependencies = DataFrame.read("/Users/merlin/IdeaProjects/private/Release-Train-Metro-Plan/src/main/kotlin/data/DependenciesInUse.csv")
val parentRelationships = DataFrame.read("/Users/merlin/IdeaProjects/private/Release-Train-Metro-Plan/src/main/kotlin/data/ParentRelationships.csv")

print("Loaded ${projectIds.rowsCount()} projects, ${dependencies.rowsCount()} dependencies, and ${parentRelationships.rowsCount()} parent relationships.")

In [None]:
import java.nio.file.Files
import java.nio.file.StandardOpenOption
import kotlin.io.path.Path
import kotlin.io.path.createFile

data class Artifact(val group: String?,
                   val artifact: String,
                   var parent: Artifact? = null) {
    override fun equals(other: Any?): Boolean = other is Artifact && other.group == group && other.artifact == artifact
    override fun hashCode(): Int = group.hashCode() * 31 + artifact.hashCode()
}

data class Repository(val path: String, val artifacts: Set<Artifact>,  val dependencies: Set<Artifact>) {
    override fun equals(other: Any?): Boolean = other is Repository && other.path == path
    override fun hashCode(): Int = path.hashCode()
}

val repos = mutableListOf<Repository>()

// Create repositories from project coordinates
projectIds
    .select { it["repositoryPath", "repositoryBranch", "groupId", "artifactId"] }
    .filter { it.repositoryBranch == "master" || it.repositoryBranch == "main" }
    .groupBy { it["repositoryPath"]}
    .forEach {
        repos.add(Repository(it.key.repositoryPath,
            it.group
                .map { a -> Artifact(a.groupId, a.artifactId) }
                .toSet(),
            dependencies
                .select { d-> d["repositoryPath", "repositoryBranch", "groupId", "artifactId"] }
                .filter { d -> d.repositoryBranch == "master" || d.repositoryBranch == "main" }
                .filter { d -> repositoryPath == it.key.repositoryPath  }
                .map { d -> Artifact(d.groupId, d.artifactId) }
                .toSet()
        ))
    }

// Process parent relationships from the new ParentRelationships DataTable
parentRelationships
    .select { it["repositoryPath", "repositoryBranch", "childArtifactId", "parentGroupId", "parentArtifactId"] }
    .filter { it.repositoryBranch == "master" || it.repositoryBranch == "main" }
    .forEach {
        repos.firstOrNull { r -> r.path == it.repositoryPath }
            ?.artifacts?.firstOrNull { a -> a.artifact == it.childArtifactId }
            ?.let { a -> a.parent = Artifact(it.parentGroupId, it.parentArtifactId)}
    }

println("derived ${repos.size} repositories from the data, containing ${repos.sumOf { it.artifacts.size }} artifacts, ${repos.sumOf { it.dependencies.size }} dependencies, and ${repos.sumOf { r -> r.artifacts.count { it.parent != null } }} parent relationships.")

In [3]:
// all data as one
/*
println(
"""
@startuml
${repos.joinToString("\n") { it.asPlantUml() }}
${repos.joinToString("\n") { it.asParentRelationships() }}
@enduml
"""
)
*/

In [None]:
// Generate connections between repositories including parent relationships
enum class LinkType { parent, dependency }
data class Link(val src: String, val dist: String, val type: LinkType) {
    fun asD3() : String = "{ source: \"${src}\", target: \"${dist}\", type: \"${type}\" }"
}
data class Node(val id: String) {
    fun asD3() : String = "{ id: \"${id}\" }"
}

val edges = mutableSetOf<Link>()

for (repo in repos) {
    // Add parent relationships: if artifact A has parent B, create link from A's repo to B's repo
    for (artifact in repo.artifacts) {
        if (artifact.parent != null) {
            val parentRepo = repos.find { it.artifacts.contains(artifact.parent) }
            if (parentRepo != null && parentRepo.path != repo.path) {
                edges.add(Link(repo.path, parentRepo.path, LinkType.parent))
            }
        }
    }
    
    // Add dependency relationships: if repo uses dependency D, create link from repo to D's repo
    repo.dependencies
        .map { dep -> repos.find { it.artifacts.contains(dep) }?.path }
        .filterNotNull()
        .filter { it != repo.path }
        .map { Link(repo.path, it, LinkType.dependency) }
        .forEach { edges.add(it) }
}

val nodes = edges.map { listOf(it.src, it.dist) }.flatMap { it }.distinct().map { Node(it) }

println(nodes.joinToString(",\n\t", prefix = "export const nodes = [\n", postfix = "\n];") { it.asD3() })
println(edges.joinToString(",\n\t", prefix = "export const links = [\n", postfix = "\n];") { it.asD3() })