In [None]:
// Import required packages
#r "nuget: FSharp.Data"

open System
open System.IO
open System.Text.RegularExpressions
open FSharp.Data
open System.Globalization
open Microsoft.FSharp.Reflection

type Lens<'T,'Field> =
    { get : 'T -> 'Field
      set : 'T -> 'Field -> 'T }

module Lens =
    /// Build a lens for a given property name (using reflection).
    let forProperty<'T,'Field> (name: string) : Lens<'T,'Field> =
        let tType = typeof<'T>
        let prop =
            tType.GetProperties()
            |> Array.find (fun p -> p.Name = name && p.PropertyType = typeof<'Field>)
        { get = fun t -> prop.GetValue(t) :?> 'Field
          set = fun t v ->
              let fields = FSharpType.GetRecordFields tType
              let values =
                  fields
                  |> Array.map (fun f ->
                      if f.Name = name then box v
                      else f.GetValue(t))
              FSharpValue.MakeRecord(tType, values) :?> 'T }

// Define input/output paths
let inputDir = "../data/cleaned"
let outputPath = "../data/enriched/enrichedPigments.json"

// Define a type alias for JSON provider
type PigmentJson = JsonProvider<"../data/cleaned/CleanedPigments-2025-10-05.json">

// Tasks
let extractDate filename =
    let m = Regex.Match(filename, @"CleanedPigments-(\d{4}-\d{2}-\d{2})\.json")
    if m.Success then Some m.Groups.[1].Value else None

let loadAndAddDate (file: string) =
    match extractDate (Path.GetFileName(file)) with
    | Some date ->
        let data = PigmentJson.Load(file)
        data
        |> Array.map (fun r ->
            {| 
                pigment1 = r.Pigment1
                pigment2 = r.Pigment2
                result = r.Result
                datePainted = date
            |})
    | None -> [||]

// [DedupePigments]--------------
// How fun, very verbose but seemingly only way to work with structual typing
// without having to define fully qualified types (i.e. adding steps before or after)
// I would have to redo all the types.
// And yeah, the inline is needed so that types can be resolved by the compiler.
// (this is fine tho since the function is only called once)
// VVVVVVVVVVVVVVVVVVV

// Deduplicate pigments by unordered pair of pigments
// Rules:
// - Keep the earliest date overall
// - If both with/without painter exist, drop the one without
// - Allow multiple painters on the same day, but not the same painter twice
let inline DedupePigments
    (input: ^T array when ^T : (member pigment1 : string)
                          and ^T : (member pigment2 : string)
                          and ^T : (member datePainted : string)
                          and ^T : (member painter : Option<string>)) =

    // shorthand accessors
    let inline p1 (x: ^T) = (^T : (member pigment1 : string) x)
    let inline p2 (x: ^T) = (^T : (member pigment2 : string) x)
    let inline date (x: ^T) = DateTime.Parse((^T : (member datePainted : string) x))
    let inline painter (x: ^T) = (^T : (member painter : Option<string>) x)

    input
    |> Array.groupBy (fun r -> Set.ofList [ p1 r; p2 r ])
    |> Array.collect (fun (_pigPair, records) -> // collect is just a flatMap
        // find the earliest date among these records
        let earliestDate =
            records
            |> Array.map date
            |> Array.min

        // keep only records on the earliest date
        let sameDay =
            records
            |> Array.filter (fun r -> date r = earliestDate)

        // if any paintered records exist, drop the painterless ones
        let hasPainter =
            sameDay |> Array.exists (fun r -> painter r |> Option.isSome)

        let filtered =
            if hasPainter then
                sameDay |> Array.filter (fun r -> painter r |> Option.isSome)
            else
                sameDay

        // dedupe by painter string (so only one per painter per day)
        filtered
        |> Array.distinctBy (fun r -> painter r |> Option.defaultValue "")
    )

let toTitleCase (s: string) =
    CultureInfo.CurrentCulture.TextInfo.ToTitleCase(s.ToLower())

let inline normalizeResults (input: 'T array) =
    // TODO: Make this type safe - maybe try aether?
    let resultLens = Lens.forProperty<'T,string> "result"
    input
    |> Array.map (fun r ->
        let normalized = toTitleCase (resultLens.get r)
        resultLens.set r normalized
    )

let inline EnrichFirstResultDiscovery (input: 'T array) =
    // Extractors for reflection-based access
    let resultLens = Lens.forProperty<'T,string> "result"
    let dateLens   = Lens.forProperty<'T,string> "datePainted"

    // Build a map of result -> earliest date
    let earliestByResult =
        input
        |> Array.groupBy (fun r -> resultLens.get r)
        |> Array.map (fun (res, records) ->
            let minDate =
                records
                |> Array.map (fun r -> DateTime.Parse(dateLens.get r))
                |> Array.min
            res, minDate
        )
        |> Map.ofArray

    // Enrich each record with the flag
    input
    |> Array.map (fun r ->
        let res  = resultLens.get r
        let date = DateTime.Parse(dateLens.get r)
        let earliest = earliestByResult.[res]
        {|
            rest = r
            firstResultDiscovery = (date = earliest)
        |}
    )

type SubmissionsCsv = CsvProvider<"../../pigmentSubmissions/pigmentSubmissions.csv", HasHeaders=true>
let paintersData = SubmissionsCsv.Load("../../pigmentSubmissions/pigmentSubmissions.csv")

let inline EnrichPainterData (input: 'T array) =
    // Extractors for reflection-based access
    let dateLens = Lens.forProperty<'T,string> "datePainted"
    let pigment1Lens = Lens.forProperty<'T,string> "pigment1"
    let pigment2Lens = Lens.forProperty<'T,string> "pigment2"

    let combos =
        input
        |> Array.map (fun x -> (DateTime.Parse(dateLens.get x), pigment1Lens.get x, pigment2Lens.get x))
    
    // let newRows =
    //     paintersData.Rows
    //     |> Seq.toArray
    //     |> Array.filter (fun x -> x.DatePainted.IsSome)
        //|> Array.map (fun x -> (x, (x.DatePainted.Value, x.Pigment1, x.Pigment2)))
        //|> Array.map (fun (x, key) -> (x, combos |> Array.contains key))
        //|> Array.filter (fun (x,seen) -> not seen)

    let newRowsFormatted =
        paintersData.Rows
        |> Seq.toArray
        //|> Array.filter (fun x -> x.DatePainted.IsSome)
        |> Array.map (fun (x) -> {|
            pigment1 = x.Pigment1
            pigment2 = x.Pigment2
            result = x.Result
            datePainted = x.DatePainted.ToString("yyyy-MM-dd")
            painter = Some x.Painter
        |})
    {|
        rest = input
        newRows = newRowsFormatted
    |}


// NOTE: Whenever you see an "Enrich" function, it immediately must be followed by a
//       merge stategy that is defined in the pipeline (can't do merges inside the enrich function)
//       there are probably ways around it (look at lens setters, can't add new fields but allows merges in-place)
//       but for now, this enrich-merge convention is used.

let output =
    Directory.GetFiles(inputDir)
    |> Array.collect loadAndAddDate
    |> EnrichPainterData 
    |> fun x -> (x.rest |> Array.map (fun y -> {|y with painter = None|}) |> Array.append x.newRows) // Merge
    |> DedupePigments
    |> normalizeResults
    |> EnrichFirstResultDiscovery
    |> Array.map (fun x -> {|x.rest with firstResultDiscovery = x.firstResultDiscovery|}) // Merge

output


In [None]:
open System.Text.Json
open System.IO

// Serialize enriched pigment data to JSON
let jsonOptions = JsonSerializerOptions(WriteIndented = true)
let enrichedJson = JsonSerializer.Serialize(output, jsonOptions)

// Ensure the output directory exists
let enrichedDir = "../data/enriched"
Directory.CreateDirectory(enrichedDir) |> ignore

// Write to file
let enrichedPath = Path.Combine(enrichedDir, "enrichedPigments.json")
File.WriteAllText(enrichedPath, enrichedJson)

printfn "âœ… Enriched pigment data written to %s" enrichedPath
