INPUTS: Take in extracted data from step 1

Apart from normal cleanup to standardize spellings and capitalizations, we are also imposing an alphabetical ordering where pigment1 is always first alphabetically and pigment2 is always second.

The reason for this is for acheivements / history data, it provides a stable key so that I can map people (regardless of what order they submit pigments in) to existing pigment data.

In [None]:
// Import necessary packages
#r "nuget: FSharp.Data"
open FSharp.Data

// Load JSON using FSharp.Data
type ComboJson = JsonProvider<"../data/extracted/ExtractedPigments-2025-11-22.json">
let rawData = ComboJson.Load("../data/extracted/ExtractedPigments-2025-11-22.json")

// Define cleanup functions
let cleanupPigmentName name =
    if name = "Phlox Paroxysm" then "Phylox Paroxysm"
    else name

let cleanupResultName (name: string) =
    if name.ToLower() = "scene: village within a time pocket" then
        "Scene: Village in a Time Pocket"
    if name.ToLower() = "scene: country side railroad" then
        "Scene: Countryside Railroad"
    else name

// Apply cleanup and alphabetical reordering
let cleanedCombos =
    rawData
    |> Array.map (fun c -> 
        let p1 = cleanupPigmentName c.Pigment1
        let p2 = cleanupPigmentName c.Pigment2
        let result = cleanupResultName c.Result
        let pigment1, pigment2 = if p1.CompareTo(p2) <= 0 then p1, p2 else p2, p1
        {| pigment1 = pigment1; pigment2 = pigment2; result = result |}
    )
    |> Array.toList
    |> List.sortBy (fun x -> x.pigment1)

// Output cleaned and reordered data
cleanedCombos


In [None]:
open System.Text.Json
open System.IO

// Serialize cleaned combos to JSON
let jsonOptions = JsonSerializerOptions(WriteIndented = true)
let json = JsonSerializer.Serialize(cleanedCombos, jsonOptions)

// Ensure the directory exists
let outputDir = "../data/cleaned"
Directory.CreateDirectory(outputDir) |> ignore

// Write to file
let outputPath = Path.Combine(outputDir, "cleaned.json")
File.WriteAllText(outputPath, json)

printfn "âœ… Cleaned data written to %s" outputPath
