In [1]:
// Import required packages
#r "nuget: FSharp.Data"

open System
open System.IO
open System.Text.RegularExpressions
open FSharp.Data
open System.Globalization
open Microsoft.FSharp.Reflection

type Lens<'T,'Field> =
    { get : 'T -> 'Field
      set : 'T -> 'Field -> 'T }

module Lens =
    /// Build a lens for a given property name (using reflection).
    let forProperty<'T,'Field> (name: string) : Lens<'T,'Field> =
        let tType = typeof<'T>
        let prop =
            tType.GetProperties()
            |> Array.find (fun p -> p.Name = name && p.PropertyType = typeof<'Field>)
        { get = fun t -> prop.GetValue(t) :?> 'Field
          set = fun t v ->
              let fields = FSharpType.GetRecordFields tType
              let values =
                  fields
                  |> Array.map (fun f ->
                      if f.Name = name then box v
                      else f.GetValue(t))
              FSharpValue.MakeRecord(tType, values) :?> 'T }

// Define input/output paths
let inputDir = "../data/cleaned"
let outputPath = "../data/enriched/enrichedPigments.json"

// Define a type alias for JSON provider
type PigmentJson = JsonProvider<"../data/cleaned/CleanedPigments-2025-10-05.json">

// Tasks
let extractDate filename =
    let m = Regex.Match(filename, @"CleanedPigments-(\d{4}-\d{2}-\d{2})\.json")
    if m.Success then Some m.Groups.[1].Value else None

let loadAndAddDate (file: string) =
    match extractDate (Path.GetFileName(file)) with
    | Some date ->
        let data = PigmentJson.Load(file)
        data
        |> Array.map (fun r ->
            {| 
                pigment1 = r.Pigment1
                pigment2 = r.Pigment2
                result = r.Result
                dateDiscovered = date
            |})
    | None -> [||]

// How fun, very verbose but seemingly only way to work with structual typing
// without having to define fully qualified types (i.e. adding steps before or after)
// I would have to redo all the types.
// And yeah, the inline is needed so that types can be resolved by the compiler.
// (this is fine tho since the function is only called once)

// Deduplicate pigments by unordered pair of pigments, keeping the latest date
let inline dedupePigments
    (input: ^T array when ^T : (member pigment1 : string)
                          and ^T : (member pigment2 : string)
                          and ^T : (member dateDiscovered : string)) =

    // shorthand accessors
    let inline p1 (x: ^T) = (^T : (member pigment1 : string) x)
    let inline p2 (x: ^T) = (^T : (member pigment2 : string) x)
    let inline date (x: ^T) = (^T : (member dateDiscovered : string) x)

    let seed = Map.empty
    input
    |> Array.fold (fun acc r ->
        let key = Set.ofList [ p1 r; p2 r ]
        match Map.tryFind key acc with
        | Some existing when date existing <= date r -> acc
        | _ -> Map.add key r acc)
        seed
    |> Map.toArray
    |> Array.map snd

let toTitleCase (s: string) =
    CultureInfo.CurrentCulture.TextInfo.ToTitleCase(s.ToLower())

let inline normalizeResults (input: 'T array) =
    // TODO: Make this type safe - maybe try aether?
    let resultLens = Lens.forProperty<'T,string> "result"
    input
    |> Array.map (fun r ->
        let normalized = toTitleCase (resultLens.get r)
        resultLens.set r normalized
    )

let inline EnrichFirstResultDiscovery (input: 'T array) =
    // Extractors for reflection-based access
    let resultLens = Lens.forProperty<'T,string> "result"
    let dateLens   = Lens.forProperty<'T,string> "dateDiscovered"

    // Build a map of result -> earliest date
    let earliestByResult =
        input
        |> Array.groupBy (fun r -> resultLens.get r)
        |> Array.map (fun (res, records) ->
            let minDate =
                records
                |> Array.map (fun r -> DateTime.Parse(dateLens.get r))
                |> Array.min
            res, minDate
        )
        |> Map.ofArray

    // Enrich each record with the flag
    input
    |> Array.map (fun r ->
        let res  = resultLens.get r
        let date = DateTime.Parse(dateLens.get r)
        let earliest = earliestByResult.[res]
        {|
            rest = r
            firstResultDiscovery = (date = earliest)
        |}
    )

let output =
    Directory.GetFiles(inputDir)
    |> Array.collect loadAndAddDate
    |> dedupePigments
    |> normalizeResults
    |> EnrichFirstResultDiscovery
    // Wish I could have done this merge inside of EnrichFirstResultDiscovery
    |> Array.map (fun x -> {|x.rest with firstResultDiscovery = x.firstResultDiscovery|})

output


index,value
,
,
,
,
,
,
,
,
,
,

Unnamed: 0,Unnamed: 1
dateDiscovered,2025-10-05
firstResultDiscovery,False
pigment1,Albescent Iridescence
pigment2,Bianco di San Giovanni
result,Abstract Mess

Unnamed: 0,Unnamed: 1
dateDiscovered,2025-09-13
firstResultDiscovery,True
pigment1,Albescent Iridescence
pigment2,Gormenghast Moss
result,Subject: Tree Of Time

Unnamed: 0,Unnamed: 1
dateDiscovered,2025-09-13
firstResultDiscovery,False
pigment1,Albescent Iridescence
pigment2,Inchoate Wossname
result,Abstract Mess

Unnamed: 0,Unnamed: 1
dateDiscovered,2025-10-05
firstResultDiscovery,False
pigment1,Albescent Iridescence
pigment2,Lusty Gallan
result,Scene: Village In A Time Pocket

Unnamed: 0,Unnamed: 1
dateDiscovered,2025-09-13
firstResultDiscovery,True
pigment1,Albescent Iridescence
pigment2,Orpiment Tabby
result,Scene: Village In A Time Pocket

Unnamed: 0,Unnamed: 1
dateDiscovered,2025-09-13
firstResultDiscovery,True
pigment1,Albescent Iridescence
pigment2,Skobeloff Forest
result,Subject: Tree Of Time

Unnamed: 0,Unnamed: 1
dateDiscovered,2025-10-05
firstResultDiscovery,False
pigment1,Albescent Iridescence
pigment2,Ten Thousand Emeralds
result,Subject: Tree Of Time

Unnamed: 0,Unnamed: 1
dateDiscovered,2025-10-05
firstResultDiscovery,False
pigment1,Amaranthine Psychopomp
pigment2,Cerulean-ish Ultramarine
result,Scene: Magical Catdom

Unnamed: 0,Unnamed: 1
dateDiscovered,2025-09-13
firstResultDiscovery,True
pigment1,Amaranthine Psychopomp
pigment2,Crystaline Blue
result,Scene: Magical Catdom

Unnamed: 0,Unnamed: 1
dateDiscovered,2025-09-13
firstResultDiscovery,False
pigment1,Amaranthine Psychopomp
pigment2,Ecru Winnower
result,Abstract Mess

Unnamed: 0,Unnamed: 1
dateDiscovered,2025-09-13
firstResultDiscovery,True
pigment1,Amaranthine Psychopomp
pigment2,Old Guard Vogue Puce
result,Scene: A Land Afar

Unnamed: 0,Unnamed: 1
dateDiscovered,2025-10-05
firstResultDiscovery,False
pigment1,Amaranthine Psychopomp
pigment2,Phylox Paroxysm
result,Abstract Mess

Unnamed: 0,Unnamed: 1
dateDiscovered,2025-10-05
firstResultDiscovery,True
pigment1,Ancient Argent
pigment2,Ashen Char
result,Scene: Mindscape

Unnamed: 0,Unnamed: 1
dateDiscovered,2025-09-13
firstResultDiscovery,False
pigment1,Ancient Argent
pigment2,Bianco di San Giovanni
result,Abstract Mess

Unnamed: 0,Unnamed: 1
dateDiscovered,2025-08-30
firstResultDiscovery,True
pigment1,Ancient Argent
pigment2,Corrupted Vermillion
result,Abstract Mess

Unnamed: 0,Unnamed: 1
dateDiscovered,2025-10-05
firstResultDiscovery,False
pigment1,Ancient Argent
pigment2,Crystaline Blue
result,Abstract Mess

Unnamed: 0,Unnamed: 1
dateDiscovered,2025-09-13
firstResultDiscovery,False
pigment1,Ancient Argent
pigment2,Gormenghast Moss
result,Abstract Mess

Unnamed: 0,Unnamed: 1
dateDiscovered,2025-10-05
firstResultDiscovery,False
pigment1,Ancient Argent
pigment2,Phylox Paroxysm
result,Abstract Mess

Unnamed: 0,Unnamed: 1
dateDiscovered,2025-10-05
firstResultDiscovery,False
pigment1,Ancient Argent
pigment2,Warm Sienna
result,Abstract Mess

Unnamed: 0,Unnamed: 1
dateDiscovered,2025-09-13
firstResultDiscovery,False
pigment1,Ashen Char
pigment2,Cerulean-ish Ultramarine
result,Abstract Mess


In [2]:
// Import required packages
#r "nuget: FSharp.Data"

open System
open System.IO
open System.Text.RegularExpressions
open FSharp.Data
open System.Globalization

// Define input/output paths
let inputDir = "../data/cleaned"
let outputPath = "../data/enriched/enrichedPigments.json"

// Define a type alias for JSON provider
type PigmentJson = JsonProvider<"../data/cleaned/CleanedPigments-2025-10-05.json">

// Tasks
let extractDate filename =
    let m = Regex.Match(filename, @"CleanedPigments-(\d{4}-\d{2}-\d{2})\.json")
    if m.Success then Some m.Groups.[1].Value else None

let loadAndAddDate (file: string) =
    match extractDate (Path.GetFileName(file)) with
    | Some date ->
        let data = PigmentJson.Load(file)
        data
        |> Array.map (fun r ->
            {|  pigment1       = r.Pigment1
                pigment2       = r.Pigment2
                result         = r.Result
                dateDiscovered = date |})
    | None -> [||]

Directory.GetFiles(inputDir)
|> Array.collect loadAndAddDate

(* This shit don't work!!!
// Keep the final type tucked away in its own module
module Domain =
    type PigmentEnriched = {
        pigment1: string
        pigment2: string
        result: string
        dateDiscovered: string
        firstResultDiscovery: bool
    }

// Everything else runs without knowing about PigmentEnriched
let extractDate filename =
    let m = Regex.Match(filename, @"CleanedPigments-(\d{4}-\d{2}-\d{2})\.json")
    if m.Success then Some m.Groups.[1].Value else None

let loadAndAddDate (file: string) =
    match extractDate (Path.GetFileName(file)) with
    | Some date ->
        let data = PigmentJson.Load(file)
        data
        |> Array.map (fun r ->
            {| pigment1       = r.Pigment1
               pigment2       = r.Pigment2
               result         = r.Result
               dateDiscovered = date |})
    | None -> [||]

let dedupePigments (allEnriched: {| pigment1: string; pigment2: string; result: string; dateDiscovered: string |}[]) =
    let seed : Map<Set<string>, {| pigment1: string; pigment2: string; result: string; dateDiscovered: string |}> = Map.empty
    allEnriched
    |> Array.fold
        (fun acc r ->
            let key = Set.ofList [ r.pigment1; r.pigment2 ]
            match Map.tryFind key acc with
            | Some existing when existing.dateDiscovered <= r.dateDiscovered -> acc
            | _ -> Map.add key r acc)
        seed
    |> Map.toArray
    |> Array.map snd

// Only at the very end do we bring the domain type into scope
let finalData : Domain.PigmentEnriched[] =
    Directory.GetFiles(inputDir)
    |> Array.collect loadAndAddDate
    |> dedupePigments
    |> Array.map (fun x ->
        { Domain.PigmentEnriched.pigment1 = x.pigment1
          pigment2 = x.pigment2
          result = x.result
          dateDiscovered = x.dateDiscovered
          firstResultDiscovery = false })

*)

// Define enriched pigment schema
// Define enriched pigment schema
// type PigmentEnriched = {
//     pigment1: string
//     pigment2: string
//     result: string
//     dateDiscovered: string
//     firstResultDiscovery: bool
// }





// // Minimal record used during loading and deduplication (not the enriched one)
// type PigmentLite = {
//     pigment1: string
//     pigment2: string
//     result: string
//     dateDiscovered: string
// }

// let finalData =
//     let extractDate filename =
//         let m = Regex.Match(filename, @"CleanedPigments-(\d{4}-\d{2}-\d{2})\.json")
//         if m.Success then Some m.Groups.[1].Value else None

//     let loadAndAddDate (file: string) : PigmentLite[] =
//         match extractDate (Path.GetFileName(file)) with
//         | Some date ->
//             let data = PigmentJson.Load(file)
//             data
//             |> Array.map (fun r ->
//                 { pigment1       = r.Pigment1
//                   pigment2       = r.Pigment2
//                   result         = r.Result
//                   dateDiscovered = date })
//         | None -> [||]

//     // Deduplicate PigmentLite by unordered pair of pigments, keeping the latest date
//     let dedupePigments (allEnriched: PigmentLite[]) : PigmentLite[] =
//         let seed : Map<Set<string>, PigmentLite> = Map.empty
//         allEnriched
//         |> Array.fold
//             (fun acc r ->
//                 let key = Set.ofList [ r.pigment1; r.pigment2 ]
//                 match Map.tryFind key acc with
//                 | Some existing when existing.dateDiscovered <= r.dateDiscovered -> acc
//                 | _ -> Map.add key r acc)
//             seed
//         |> Map.toArray
//         |> Array.map snd

//     Directory.GetFiles(inputDir)
//     |> Array.collect loadAndAddDate
//     |> dedupePigments
//     |> Array.map (fun x ->
//         { pigment1 = x.pigment1
//           pigment2 = x.pigment2
//           result = x.result
//           dateDiscovered = x.dateDiscovered
//           firstResultDiscovery = false })

// // Helper to convert to title case
// let toTitleCase (s: string) =
//     CultureInfo.CurrentCulture.TextInfo.ToTitleCase(s.ToLower())

// // Extract date from filename
// let extractDate filename =
//     let m = Regex.Match(filename, @"CleanedPigments-(\d{4}-\d{2}-\d{2})\.json")
//     if m.Success then Some m.Groups.[1].Value else None

// // Load and enrich a single file
// // Raw load: anonymous record, no firstResultDiscovery yet
// let loadAndEnrich (file: string) =
//     match extractDate (Path.GetFileName(file)) with
//     | Some date ->
//         let data = PigmentJson.Load(file)
//         data
//         |> Array.map (fun r ->
//             {| pigment1 = r.Pigment1
//                pigment2 = r.Pigment2
//                result = r.Result
//                dateDiscovered = date |})
//     | None -> [||]

// // Load all files and enrich
// let allEnriched =
//     Directory.GetFiles(inputDir)
//     |> Array.collect loadAndEnrich

// // Deduplicate still works on anon records
// let deduped : {| pigment1: string; pigment2: string; result: string; dateDiscovered: string |}[] =
//     allEnriched
//     |> Array.fold (fun acc r ->
//         let key = Set.ofList [r.pigment1; r.pigment2]
//         match Map.tryFind key acc with
//         | Some existing when existing.dateDiscovered <= r.dateDiscovered -> acc
//         | _ -> Map.add key r acc) Map.empty
//     |> Map.toArray
//     |> Array.map snd

// // Normalize results into smart title case
// let normalizeResults (records) =
//     records
//     |> Array.map (fun r -> { r with result = toTitleCase r.result })

// // Compute earliest discovery date per result
// let earliestByResult (records) =
//     records
//     |> Array.fold (fun acc r ->
//         let rDate = DateTime.Parse(r.dateDiscovered)
//         match Map.tryFind r.result acc with
//         | Some existing when existing <= rDate -> acc
//         | _ -> Map.add r.result rDate acc
//     ) Map.empty

// // Enrich with firstResultDiscovery flag
// let enrichWithFirstResultDiscovery (records) =
//     let earliest = earliestByResult records
//     records
//     |> Array.map (fun r ->
//         { r with firstResultDiscovery = DateTime.Parse(r.dateDiscovered) = earliest.[r.result] })



// let finalData =
//     deduped
//     |> normalizeResults
//     //|> enrichWithFirstResultDiscovery

//finalData

index,value
,
,
,
,
,
,
,
,
,
,

Unnamed: 0,Unnamed: 1
dateDiscovered,2025-10-05
pigment1,Albescent Iridescence
pigment2,Gormenghast Moss
result,Subject: Tree of time

Unnamed: 0,Unnamed: 1
dateDiscovered,2025-10-05
pigment1,Albescent Iridescence
pigment2,Orpiment Tabby
result,Scene: Village in a Time Pocket

Unnamed: 0,Unnamed: 1
dateDiscovered,2025-10-05
pigment1,Albescent Iridescence
pigment2,Bianco di San Giovanni
result,Abstract Mess

Unnamed: 0,Unnamed: 1
dateDiscovered,2025-10-05
pigment1,Albescent Iridescence
pigment2,Ten Thousand Emeralds
result,Subject: Tree of time

Unnamed: 0,Unnamed: 1
dateDiscovered,2025-10-05
pigment1,Albescent Iridescence
pigment2,Skobeloff Forest
result,Subject: Tree of time

Unnamed: 0,Unnamed: 1
dateDiscovered,2025-10-05
pigment1,Albescent Iridescence
pigment2,Inchoate Wossname
result,Abstract Mess

Unnamed: 0,Unnamed: 1
dateDiscovered,2025-10-05
pigment1,Albescent Iridescence
pigment2,Lusty Gallan
result,scene: Village in a Time Pocket

Unnamed: 0,Unnamed: 1
dateDiscovered,2025-10-05
pigment1,Amaranthine Psychopomp
pigment2,Ecru Winnower
result,Abstract Mess

Unnamed: 0,Unnamed: 1
dateDiscovered,2025-10-05
pigment1,Amaranthine Psychopomp
pigment2,Phylox Paroxysm
result,Abstract Mess

Unnamed: 0,Unnamed: 1
dateDiscovered,2025-10-05
pigment1,Amaranthine Psychopomp
pigment2,Old Guard Vogue Puce
result,Scene: A Land Afar

Unnamed: 0,Unnamed: 1
dateDiscovered,2025-10-05
pigment1,Amaranthine Psychopomp
pigment2,Crystaline Blue
result,Scene: Magical Catdom

Unnamed: 0,Unnamed: 1
dateDiscovered,2025-10-05
pigment1,Amaranthine Psychopomp
pigment2,Cerulean-ish Ultramarine
result,Scene: Magical Catdom

Unnamed: 0,Unnamed: 1
dateDiscovered,2025-10-05
pigment1,Ancient Argent
pigment2,Gormenghast Moss
result,Abstract Mess

Unnamed: 0,Unnamed: 1
dateDiscovered,2025-10-05
pigment1,Ancient Argent
pigment2,Warm Sienna
result,Abstract Mess

Unnamed: 0,Unnamed: 1
dateDiscovered,2025-10-05
pigment1,Ancient Argent
pigment2,Bianco di San Giovanni
result,Abstract Mess

Unnamed: 0,Unnamed: 1
dateDiscovered,2025-10-05
pigment1,Ancient Argent
pigment2,Corrupted Vermillion
result,Abstract Mess

Unnamed: 0,Unnamed: 1
dateDiscovered,2025-10-05
pigment1,Ancient Argent
pigment2,Phylox Paroxysm
result,Abstract Mess

Unnamed: 0,Unnamed: 1
dateDiscovered,2025-10-05
pigment1,Ancient Argent
pigment2,Crystaline Blue
result,Abstract Mess

Unnamed: 0,Unnamed: 1
dateDiscovered,2025-10-05
pigment1,Ancient Argent
pigment2,Ashen Char
result,Scene: Mindscape

Unnamed: 0,Unnamed: 1
dateDiscovered,2025-10-05
pigment1,Ashen Char
pigment2,Crystaline Blue
result,Abstract Mess


In [None]:
open System.Text.Json
open System.IO

// Serialize enriched pigment data to JSON
let jsonOptions = JsonSerializerOptions(WriteIndented = true)
let enrichedJson = JsonSerializer.Serialize(output, jsonOptions)

// Ensure the output directory exists
let enrichedDir = "../data/enriched"
Directory.CreateDirectory(enrichedDir) |> ignore

// Write to file
let enrichedPath = Path.Combine(enrichedDir, "enrichedPigments.json")
File.WriteAllText(enrichedPath, enrichedJson)

printfn "✅ Enriched pigment data written to %s" enrichedPath


✅ Enriched pigment data written to ../data/enriched\enrichedPigments.json
