# Are all Songhay credits in the `player-audio/` <acronym title="JavaScript Object Notation">JSON</acronym> files well-formed <acronym title="Extensible Markup Language">XML</acronym>?

Because these <acronym title="JavaScript Object Notation">JSON</acronym> files came from InfoPath <acronym title="Extensible Markup Language">XML</acronym>, I think the answer is _yes_ which would be great because navigable documents can be generated to extract data from these files for a Songhay Publications future.

Let us verify this assertion!

In [1]:
#!fsharp
#r "nuget: FsToolkit.ErrorHandling"
#r "nuget: Songhay.Modules"
#r "nuget: Songhay.Modules.Publications"

## enumerating player manifests

The `directories` binding calls `Directory.EnumerateDirectories` with a path calculated from an environment variable, `VSCODE_CWD`, and a conventional path:

In [2]:
#!fsharp

open System.IO
open System.Linq

open FsToolkit.ErrorHandling

open Songhay.Modules.ProgramFileUtility

Formatter.ListExpansionLimit <- 50

let home = Environment.GetEnvironmentVariable("VSCODE_CWD")

let directories =
    result {
        let! path = tryGetCombinedPath home "sourceRoot/azure-storage-accounts/songhaystorage/player-audio/"

        return Directory.EnumerateDirectories(path)
    }
    |> Result.valueOr raiseProgramFileError
    |> List.ofSeq

## yielding credits HTML

With the `directories` binding, we use the `LegacyPresentationUtility` to express `$"<credits>{html}</credits>"`:

In [3]:
open System.Collections.Generic
open System.Xml.Linq

open Songhay.Modules.JsonDocumentUtility
open Songhay.Modules.Bolero.LegacyPresentationUtility

let creditsData = Dictionary<string, XDocument>()

directories
|> List.sort
|> List.iter
    (
        fun root ->
            let fileName = root.Split(Path.DirectorySeparatorChar).Last()

            match tryGetCombinedPath root $"{fileName}.json" with
            | Ok path ->
                let json = File.ReadAllText(path)
                let presentationElementResult = json |> tryGetPresentationElementResult
                let html =
                    presentationElementResult
                    |> tryGetPresentationCreditsResult
                    |> toResultFromStringElement (fun el -> el.GetString())
                    |> Result.valueOr raise

                let xhtml = $"<credits>{html}</credits>"
                let xDoc = XDocument.Parse(xhtml)

                creditsData.Add(fileName, xDoc);

            | _ -> ()
    )

let nl = Environment.NewLine

creditsData.Select(fun pair -> $"{nl}{pair.Key},{nl}{pair.Value}{nl}")

## credits inspection: all root children are `div` elements?

We can visually inspect the HTML output above to verify that the general format of all credits entries is of the form:

```html
<credits>
    <div>…</div>
    <div>…</div>
    <div>…</div>
    …
</credits>
```

What is better than visual inspection is automation! So let us look for this pattern:

In [4]:
creditsData.SelectMany
    (
        fun pair ->
            pair.Value.Root.Elements().Select(
                fun el ->
                    if el.Name.LocalName = "div" then None
                    else
                        Some $"{pair.Key}, {el.Name.LocalName}"
            )
    )
    |> Array.ofSeq
    |> Array.filter (fun x -> x.IsSome)

index,value
,
,
,
,
,
,
,
,
,
,

Unnamed: 0,Unnamed: 1
Value,"bell_hooks, strong"

Unnamed: 0,Unnamed: 1
Value,"bell_hooks, br"

Unnamed: 0,Unnamed: 1
Value,"bell_hooks, strong"

Unnamed: 0,Unnamed: 1
Value,"bell_hooks, br"

Unnamed: 0,Unnamed: 1
Value,"bell_hooks, br"

Unnamed: 0,Unnamed: 1
Value,"bell_hooks, strong"

Unnamed: 0,Unnamed: 1
Value,"libradio00, strong"

Unnamed: 0,Unnamed: 1
Value,"libradio00, br"

Unnamed: 0,Unnamed: 1
Value,"libradio00, strong"

Unnamed: 0,Unnamed: 1
Value,"libradio00, br"

Unnamed: 0,Unnamed: 1
Value,"libradio00, strong"

Unnamed: 0,Unnamed: 1
Value,"libradio00, br"

Unnamed: 0,Unnamed: 1
Value,"libradio00, strong"


The credits for two presentations use `br` elements instead of demarking lines with `div` of this form:

```html
<credits>
    …<strong>…</strong>
    <br />
    …
</credits>
```

This `…<strong>…</strong>` pattern is important as the assertions here are:

- the text node before the first `strong` element maps to `RoleCredit.role`
- the text content of any `strong` element maps to `RoleCredit.name`

Let us verify these assertions for the two `credits` elements found above and all `div` elements.

## inspection functions

These are the functions shared for the `RoleCredit` inspections:

In [5]:
Formatter.ListExpansionLimit <- 175

open System.Xml

let elementContainsBrElements (element: XElement) =
    element.Elements("br").Any()

let elementContainsStrongElements (element: XElement) =
    element.Elements("strong").Any()

let elementFirstNodeIsBr (element: XElement) =
    match element.FirstNode with
    | :? XElement as el when el.Name.LocalName = "br" -> true
    | _ -> false

let elementFirstNodeIsXText (element: XElement) =
    element.FirstNode.NodeType = XmlNodeType.Text

let elementIsEmptyOrWhiteSpace (element: XElement) =
    if element.Nodes().Count() = 1 then
        match element.FirstNode with
        | :? XText as txt when System.String.IsNullOrWhiteSpace(txt.Value) -> true
        | _ -> false
    else false

let getXText (n: XNode) =
    match n with
    | :? XText as txt when not(System.String.IsNullOrWhiteSpace(txt.Value)) -> txt
    | :? XElement as el when el.Name.LocalName = "a" || el.Name.LocalName = "font" ->
        match el.Nodes().FirstOrDefault() with
        | :? XText as txt when not(System.String.IsNullOrWhiteSpace(txt.Value)) -> txt
        | _ -> null
    | _ -> null

let isCreditsWithManyChildDivs (credits: XElement) =
    credits.Name.LocalName = "credits" && credits.Elements("div").Count() > 1

let isCreditsWithOneChildDiv (credits: XElement) =
    credits.Name.LocalName = "credits" && credits.Elements("div").Count() = 1

let isXTextValid (txt: XText) =
    not(txt = null)
    && not(txt.Value.Trim() = "and")
    && not(txt.Value.Trim() = "(")
    && not(txt.Value.Trim() = ")")


## credits inspection: the text node before the first `strong` element

Here we will show that the text node before the first `strong` element maps to `RoleCredit.role`:

In [6]:

let extractRoleXText (document: XDocument) =
    match document.Root with
    | credits when credits.Name.LocalName = "credits" ->

        if credits |> isCreditsWithManyChildDivs &&
            credits.Elements("div").All(fun div ->
                div |> elementContainsBrElements || div |> elementIsEmptyOrWhiteSpace) then

            credits
                .Elements("div")
                .Nodes()
                .Where(fun n -> n.NodeType = XmlNodeType.Text)
                .Select(getXText)
                .Where(isXTextValid).ToArray()

        else if credits |> isCreditsWithManyChildDivs &&
            credits.Elements("div").First() |> elementContainsBrElements &&
            credits.Elements("div").Last() |> elementContainsBrElements |> not &&
            credits.Elements("div").Last() |> elementContainsStrongElements then

            credits
                .Elements("div")
                .Nodes()
                .Where(fun n -> n.NodeType = XmlNodeType.Text)
                .Select(getXText)
                .Where(isXTextValid).ToArray()

        else if credits |> isCreditsWithManyChildDivs &&
            credits.Elements("div").First() |> elementContainsBrElements then

            credits
                .Elements("div")
                .First()
                .Nodes()
                .Where(fun n -> n.NodeType = XmlNodeType.Text)
                .Select(getXText)
                .Where(isXTextValid).ToArray()

        else if credits |> isCreditsWithManyChildDivs then
            credits
                .Elements("div")
                .Select(fun div ->
                    if div |> elementFirstNodeIsXText then
                        div.FirstNode |> getXText
                    else null
                )
                .Where(isXTextValid).ToArray()

        else if
            credits |> isCreditsWithOneChildDiv &&
            credits.Elements("div").First() |> elementContainsBrElements then

            credits
                .Elements("div")
                .First()
                .Nodes()
                .Where(fun n -> n.NodeType = XmlNodeType.Text)
                .Select(getXText)
                .Where(isXTextValid).ToArray()

        else if credits |> elementFirstNodeIsXText then
            credits
                .Nodes()
                .Where(fun n -> n.NodeType = XmlNodeType.Text)
                .Select(getXText)
                .Where(isXTextValid).ToArray()

        else Array.Empty<XText>()
    | _ -> Array.Empty<XText>()

creditsData.Select(fun pair -> pair.Key, extractRoleXText(pair.Value))
    |> Array.ofSeq
    |> Array.filter (fun (_, roles) -> roles.Any())
    |> Array.map(fun (key, array) ->
        let join = String.Join(nl,(array |> Array.map(fun xText -> xText.Value)))
        $"{nl}{key},{nl}{join}{nl}")

## credits inspection: the text content of any `strong` element maps to `RoleCredit.name`

Here we will add to the work above and show that the text content of any `strong` element maps to `RoleCredit.name`:

In [7]:
let extractNameXText (document: XDocument) =
    match document.Root with
    | credits when credits.Name.LocalName = "credits" ->
        if credits |> isCreditsWithManyChildDivs then
            credits
                .Elements("div")
                .SelectMany(fun div ->
                    if div |> elementFirstNodeIsXText || div |> elementFirstNodeIsBr then
                        div
                            .Descendants("strong")
                            .Where(fun strong -> strong.Nodes().Any())
                            .Select(fun strong -> strong.FirstNode |> getXText )
                    else Array.Empty<XText>()
                ).Where(isXTextValid).ToArray()

        else if
            credits |> isCreditsWithOneChildDiv &&
            credits.Elements("div").First() |> elementContainsBrElements then

            credits
                .Elements("div")
                .First()
                .Descendants("strong")
                .Where(fun strong -> strong.Nodes().Any())
                .Select(fun strong -> strong.FirstNode |> getXText)
                .Where(isXTextValid).ToArray()

        else if credits |> elementFirstNodeIsXText then
            credits
                .Descendants("strong")
                .Where(fun strong -> strong.Nodes().Any())
                .Select(fun strong -> strong.FirstNode |> getXText)
                .Where(isXTextValid).ToArray()

        else Array.Empty<XText>()
    | _ -> Array.Empty<XText>()

creditsData.Select(fun pair -> pair.Key, extractRoleXText(pair.Value), extractNameXText(pair.Value))
    |> Array.ofSeq
    |> Array.filter (fun (_, roles, _) -> roles.Any())
    |> Array.map(fun (key, roleArray, nameArray) ->
        let joinRoles = String.Join(nl,(roleArray |> Array.map(fun xText -> xText.Value)))
        let joinNames = String.Join(nl,(nameArray |> Array.map(fun xText -> xText.Value)))
        $"{nl}{nl}{key},{nl}{nl}{joinRoles},{nl}{nl}{joinNames}{nl}")

## is the `creditsData` mapping of sufficient quality?

I can declare that the `creditsData` mapping is of sufficient quality once the following exceptions are recognized:

| key | remarks |
| - | - |
| `arundhati_roy0` | the first two names share the same role |
| `ashanti_alston0` | additional data can be added through manual curation later |
| `c_xavier` | additional data can be added through manual curation later |
| `chris_abani0` | additional data can be added through manual curation later |
| `ward_churchill0` | the first name was captured in role data because `strong` tags are missing |
| `william_watkins0` | additional data can be added through manual curation later |
| `wm3` | the first two names share the same role |

Apart from additional data that can be added through manual curation later, there are only three exceptions that can be handled here:

| key | remarks |
| - | - |
| `arundhati_roy0` | the first two names share the same role |
| `ward_churchill0` | the first name was captured in role data because `strong` tags are missing |
| `wm3` | the first two names share the same role |


## exporting `RoleCredit` data from the `creditsData`

Now that we see `extractRoleXText` and `extractNameXText` working reasonably well (what with the three exceptions above), we can yield `RoleCredit` data and serialize to JSON:

In [8]:
let toRoleCreditJson (key: string, roles: XText array, names: XText array) =
    let rolesMapped =
        match key with
        | "arundhati_roy0" | "wm3" ->
            //the first two names share the same role
            roles |> Array.insertAt 1 (roles |> Array.head)
        | "ward_churchill0" ->
            //the first name was captured in role data because `strong` tags are missing
            roles |> Array.mapi (fun i xText ->
                    if i = 0 then
                        xText.Value <- xText.Value.Replace("Maria Gilardin of ", String.Empty)
                        xText
                    else
                        xText
                )
        | _ -> roles

    let namesMapped =
        match key with
        | "ward_churchill0" ->
            //the first name was captured in role data because `strong` tags are missing
            names |> Array.insertAt 1 (XText "Bryan Wilhite")
        | _ -> names

    let credits =
        Array.zip rolesMapped namesMapped
        |> Array.map
            (
                fun (role, name) ->
                    let roleValue =
                        role.Value
                            .Replace("by", String.Empty)
                            .Replace(".", String.Empty)
                            .Replace(",", String.Empty)
                            .Replace("(", String.Empty)
                            .Replace("“", String.Empty)
                            .TrimEnd()

                    let nameValue =
                        name.Value
                            .Replace(",", String.Empty)
                            .Replace("(", String.Empty)
                            .Replace("“", String.Empty)
                            .TrimEnd()

                    $"{{ \"role\": \"{roleValue}\", \"name\": \"{nameValue}\" }}"
            )
        |> Array.reduce (fun a s -> $"{a},{nl}{s}")
    $"[{credits}]"

let creditsDataExport =
    creditsData
        .Select(fun pair -> pair.Key, extractRoleXText(pair.Value), extractNameXText(pair.Value))
        .Where(fun (_, roles, _) -> roles.Any())
        .ToDictionary((fun(key, _, _) -> key), toRoleCreditJson)

directories
|> List.iter
    (
        fun root ->
            let fileName = root.Split(Path.DirectorySeparatorChar).Last()

            match tryGetCombinedPath root $"{fileName}_credits.json" with
            | Ok path ->
                let json = creditsDataExport[fileName]
                File.WriteAllText(path, json)

            | _ -> ()
    )