# Imports and functions

In [6]:
%use dataframe

import java.net.URLDecoder
import java.net.URLEncoder
import org.commonmark.Extension
import org.commonmark.ext.footnotes.FootnotesExtension
import org.commonmark.ext.front.matter.YamlFrontMatterExtension
import org.commonmark.ext.front.matter.YamlFrontMatterVisitor
import org.commonmark.ext.gfm.strikethrough.StrikethroughExtension
import org.commonmark.ext.gfm.tables.TablesExtension
import org.commonmark.ext.task.list.items.TaskListItemsExtension
import org.commonmark.node.AbstractVisitor
import org.commonmark.node.CustomNode
import org.commonmark.node.Link
import org.commonmark.node.Node
import org.commonmark.node.Text
import org.commonmark.parser.Parser
import org.commonmark.renderer.html.HtmlRenderer
import org.commonmark.renderer.markdown.MarkdownRenderer
import sims.michael.joplin2obsidian.TestConfig

In [16]:
import java.io.File

val testFile = File("/tmp/sims.michael.joplin2obsidian.MainCommand11316666218871478507/input/Main Notebook/UTC parking receipt, 2020-06-11.md")
val inputDir = File("/tmp/sims.michael.joplin2obsidian.MainCommand11316666218871478507/input")
val outputDir = inputDir.parentFile.resolve("output")
val relativeTo = testFile.relativeTo(inputDir)
val outputFile = outputDir.resolve(relativeTo)
DISPLAY(inputDir)
DISPLAY(testFile)
DISPLAY(outputDir)
DISPLAY(outputFile)

/tmp/sims.michael.joplin2obsidian.MainCommand11316666218871478507/input

/tmp/sims.michael.joplin2obsidian.MainCommand11316666218871478507/input/Main Notebook/UTC parking receipt, 2020-06-11.md

/tmp/sims.michael.joplin2obsidian.MainCommand11316666218871478507/output

/tmp/sims.michael.joplin2obsidian.MainCommand11316666218871478507/output/Main Notebook/UTC parking receipt, 2020-06-11.md

In [None]:
fun displayWithLabel(label: String, data: Any) {
    DISPLAY("${label.uppercase()}:\n")
    DISPLAY(data)
    DISPLAY("\n")
}

In [None]:
val extensions = listOf<Extension>(
    TablesExtension.create(),
    StrikethroughExtension.create(),
    FootnotesExtension.create(),
)
val parser = Parser.builder().extensions(extensions).build()
val builder = MarkdownRenderer.builder()
val renderer = builder.extensions(extensions).build()

# List all Markdown Files

In [None]:
val workingDir = TestConfig.workingDirOverrideFile

In [None]:
val markdownFiles = workingDir.walk().filter { it.extension == "md" }.toList()

In [None]:
data class MdFile(val name: String, val path: String)
markdownFiles.map { MdFile(it.name, it.parent) }.toDataFrame()

# List of all attachments in resources

In [None]:
data class Attachment(val name: String, val path: String, val extension: String)
val attachments = workingDir.resolve("_resources").listFiles().orEmpty().map { Attachment(it.name, it.absolutePath, it.extension) }.toDataFrame()

In [None]:
attachments

# Queries and explorations

In [None]:
fun String.urlDecodeSpacesAndParens() = URLDecoder.decode(this, Charsets.UTF_8)
fun String.urlEncodeSpacesAndParens() = URLEncoder.encode(this, Charsets.UTF_8).replace("+", "%20")

In [None]:
data class LinkRow(val name: String?, val destination: String, val baseDestination: String, val nameAndBaseDestinationMatch: Boolean, val originalDestinationLooksUrlEncoded: Boolean)

In [None]:
fun Node.collectLinks(): List<Link> = buildList {
    accept(object : AbstractVisitor() {
        override fun visit(link: Link) {
            add(link)
        }
    })
}

fun Link.toLinkRow(): LinkRow {
    val baseDestination = File(destination).name
    val name = (firstChild as? Text)?.literal
    return LinkRow(name, destination, baseDestination.urlDecodeSpacesAndParens(), name == baseDestination, destination.contains("%"))
}

## All links

In [None]:
val allLinks = markdownFiles
    .flatMap { file -> parser.parse(file.readText()).collectLinks() }
    .map { link -> link.toLinkRow() }
    .toDataFrame()
allLinks

In [None]:
val resourcesLinks = allLinks.filter { destination.startsWith("../_resources/") }
resourcesLinks

## Resource links with no extension

In [None]:
resourcesLinks.filter { File(destination).extension.isEmpty() }

In [None]:
fun String.toSafeFileName(): String {
    val invalidChars = listOf('\\', '/', ':', '*', '?', '"', '<', '>', '|')
    return this.map { if (it in invalidChars) '_' else it }.joinToString("").replace("\\s+".toRegex(), "_")
}

## Resource links whos names and base destinations don't match

In [None]:
resourcesLinks.filter { !nameAndBaseDestinationMatch }

In [None]:
val currentNames = attachments.toListOf<Attachment>().map { it.name }
val baseDestinations = resourcesLinks.toListOf<LinkRow>().map { it.baseDestination }

resourcesLinks
    .toListOf<LinkRow>()
    .distinctBy { it.baseDestination }
    .fold(listOf<Pair<String, String>>()) { renames, row ->
        val currentName = row.baseDestination
        val extension = listOfNotNull(row.name, currentName)
            .mapNotNull { name -> File(name).extension }
            .first { extension -> !extension.isEmpty() }
        val newName = currentName to "${File(currentName).nameWithoutExtension}.$extension"
        renames + listOfNotNull(newName.takeIf { (old, new) -> old != new })
    }
    .toDataFrame()

## Number of unique base destinations and number of unique new names

In [None]:
dataFrameOf(
    "unique base destinations" to listOf(newNames.countDistinct { baseDestination }),
    "unique new names" to listOf(newNames.countDistinct { newName }),
)

# Last chance

## Unique 3 character strings from destinations that start with %

In [None]:
resourcesLinks.toListOf<LinkRow>().flatMap { "%..".toRegex().findAll(it.destination).map { it.groupValues } }.distinct()

## Compare Whirlpool manual note - original and rerender

In [None]:
val whirlpoolText = markdownFiles
    .single { it.name == "Whirlpool Refrigerator Model _GSF26C4EXY03 manual.md" }
    .readText()

val node = parser.parse(whirlpoolText)
val rendered = renderer.render(node)

displayWithLabel("original", whirlpoolText)
displayWithLabel("rendered", rendered)

## Front matter parsing example

In [None]:
val whirlpoolText = markdownFiles.single { it.name == "Whirlpool Refrigerator Model _GSF26C4EXY03 manual.md" }.readText()

val extensions = setOf<Extension>(YamlFrontMatterExtension.create())
val node = Parser
    .builder()
    .extensions(extensions)
    .build()
    .parse(whirlpoolText)

val visitor = object : YamlFrontMatterVisitor() {
    override fun visit(customNode: CustomNode) {
        super.visit(customNode)
    }
}.also(node::accept)

val rendered = MarkdownRenderer.builder().extensions(extensions).build().render(node)

displayWithLabel("Front matter", visitor.data)
displayWithLabel("Original text", whirlpoolText)
displayWithLabel("Re-rendered text", rendered)


## Compare original to rendered

In [None]:
data class Comparison(val file: String, val original: String, val rendered: String, val match: Boolean)
val comparisons = markdownFiles.map { file ->
    val original = file.readText()
    val node = parser.parse(original)
    val rendered = renderer.render(node)
    Comparison(file.name, original, rendered, original == rendered)
}

comparisons.toDataFrame()
    .filter { it[Comparison::original].contains("- [ ]") }

## Prove to myself that url encode/decode works fine

In [None]:
val what = dataFrameOf("original string")(
    "simple",
    "has spaces",
    "has (all) [kinds] `of` *special* characters !@#$%^&*()",
    "has + pluses + ok?"
)
what


In [None]:
what.add("encoded") {
//    URLEncoder.encode(`original string`, Charsets.UTF_8)
    `original string`.urlEncodeSpacesAndParens()
}
    .select {
        `original string` and
                col("encoded") and
                col<String>("encoded")
                    .map {
//                        URLDecoder.decode(it, Charsets.UTF_8)
                        it.urlDecodeSpacesAndParens()
                    }
                    .named("decoded")
    }.add("same?") { `original string` == it.get("decoded") }