# Imports and functions

In [76]:
%use dataframe

import java.net.URLDecoder
import java.net.URLEncoder
import org.commonmark.Extension
import org.commonmark.ext.footnotes.FootnotesExtension
import org.commonmark.ext.front.matter.YamlFrontMatterExtension
import org.commonmark.ext.front.matter.YamlFrontMatterVisitor
import org.commonmark.ext.gfm.strikethrough.StrikethroughExtension
import org.commonmark.ext.gfm.tables.TablesExtension
import org.commonmark.ext.task.list.items.TaskListItemsExtension
import org.commonmark.node.AbstractVisitor
import org.commonmark.node.CustomNode
import org.commonmark.node.Link
import org.commonmark.node.Node
import org.commonmark.node.Text
import org.commonmark.parser.Parser
import org.commonmark.renderer.html.HtmlRenderer
import org.commonmark.renderer.markdown.MarkdownRenderer
import sims.michael.joplin2obsidian.TestConfig

In [77]:
import java.io.File

val testFile = File("/tmp/sims.michael.joplin2obsidian.MainCommand11316666218871478507/input/Main Notebook/UTC parking receipt, 2020-06-11.md")
val inputDir = File("/tmp/sims.michael.joplin2obsidian.MainCommand11316666218871478507/input")
val outputDir = inputDir.parentFile.resolve("output")
val relativeTo = testFile.relativeTo(inputDir)
val outputFile = outputDir.resolve(relativeTo)
DISPLAY(inputDir)
DISPLAY(testFile)
DISPLAY(outputDir)
DISPLAY(outputFile)

/tmp/sims.michael.joplin2obsidian.MainCommand11316666218871478507/input

/tmp/sims.michael.joplin2obsidian.MainCommand11316666218871478507/input/Main Notebook/UTC parking receipt, 2020-06-11.md

/tmp/sims.michael.joplin2obsidian.MainCommand11316666218871478507/output

/tmp/sims.michael.joplin2obsidian.MainCommand11316666218871478507/output/Main Notebook/UTC parking receipt, 2020-06-11.md

In [78]:
fun displayWithLabel(label: String, data: Any) {
    DISPLAY("${label.uppercase()}:\n")
    DISPLAY(data)
    DISPLAY("\n")
}

In [79]:
val extensions = listOf<Extension>(
    TablesExtension.create(),
    StrikethroughExtension.create(),
    FootnotesExtension.create(),
)
val parser = Parser.builder().extensions(extensions).build()
val builder = MarkdownRenderer.builder()
val renderer = builder.extensions(extensions).build()

# List all Markdown Files

In [80]:
val workingDir = TestConfig.workingDirOverrideFile

In [81]:
val markdownFiles = workingDir.walk().filter { it.extension == "md" }.toList()

In [82]:
data class MdFile(val name: String, val path: String)
markdownFiles.map { MdFile(it.name, it.parent) }.toDataFrame()

name,path


# List of all attachments in resources

In [83]:
data class Attachment(val name: String, val path: String, val extension: String)
val attachments = workingDir.resolve("_resources").listFiles().orEmpty().map { Attachment(it.name, it.absolutePath, it.extension) }.toDataFrame()

In [84]:
attachments

name,path,extension


# Queries and explorations

In [85]:
fun String.urlDecodeSpacesAndParens() = URLDecoder.decode(this, Charsets.UTF_8)
fun String.urlEncodeSpacesAndParens() = URLEncoder.encode(this, Charsets.UTF_8).replace("+", "%20")

In [86]:
data class LinkRow(val name: String?, val destination: String, val baseDestination: String, val nameAndBaseDestinationMatch: Boolean, val originalDestinationLooksUrlEncoded: Boolean)

In [87]:
fun Node.collectLinks(): List<Link> = buildList {
    accept(object : AbstractVisitor() {
        override fun visit(link: Link) {
            add(link)
        }
    })
}

fun Link.toLinkRow(): LinkRow {
    val baseDestination = File(destination).name
    val name = (firstChild as? Text)?.literal
    return LinkRow(name, destination, baseDestination.urlDecodeSpacesAndParens(), name == baseDestination, destination.contains("%"))
}

## All links

In [88]:
val allLinks = markdownFiles
    .flatMap { file -> parser.parse(file.readText()).collectLinks() }
    .map { link -> link.toLinkRow() }
    .toDataFrame()
allLinks

name,destination,baseDestination,nameAndBaseDestinationMatch,originalDestinationLooksUrlEncoded


In [89]:
val resourcesLinks = allLinks.filter { destination.startsWith("../_resources/") }
resourcesLinks

name,destination,baseDestination,nameAndBaseDestinationMatch,originalDestinationLooksUrlEncoded


## Resource links with no extension

In [90]:
resourcesLinks.filter { File(destination).extension.isEmpty() }

name,destination,baseDestination,nameAndBaseDestinationMatch,originalDestinationLooksUrlEncoded


In [91]:
fun String.toSafeFileName(): String {
    val invalidChars = listOf('\\', '/', ':', '*', '?', '"', '<', '>', '|')
    return this.map { if (it in invalidChars) '_' else it }.joinToString("").replace("\\s+".toRegex(), "_")
}

## Resource links whos names and base destinations don't match

In [92]:
resourcesLinks.filter { !nameAndBaseDestinationMatch }

name,destination,baseDestination,nameAndBaseDestinationMatch,originalDestinationLooksUrlEncoded


In [93]:
val currentNames = attachments.toListOf<Attachment>().map { it.name }
val baseDestinations = resourcesLinks.toListOf<LinkRow>().map { it.baseDestination }

resourcesLinks
    .toListOf<LinkRow>()
    .distinctBy { it.baseDestination }
    .fold(listOf<Pair<String, String>>()) { renames, row ->
        val currentName = row.baseDestination
        val extension = listOfNotNull(row.name, currentName)
            .mapNotNull { name -> File(name).extension }
            .first { extension -> !extension.isEmpty() }
        val newName = currentName to "${File(currentName).nameWithoutExtension}.$extension"
        renames + listOfNotNull(newName.takeIf { (old, new) -> old != new })
    }
    .toDataFrame()

first,second


# Last chance

## Unique 3 character strings from destinations that start with %

In [94]:
resourcesLinks.toListOf<LinkRow>().flatMap { "%..".toRegex().findAll(it.destination).map { it.groupValues } }.distinct()

[]

## Compare original to rendered

In [95]:
data class Comparison(val file: String, val original: String, val rendered: String, val match: Boolean)
val comparisons = markdownFiles.map { file ->
    val original = file.readText()
    val node = parser.parse(original)
    val rendered = renderer.render(node)
    Comparison(file.name, original, rendered, original == rendered)
}

comparisons.toDataFrame()
    .filter { it[Comparison::original].contains("- [ ]") }

file,original,rendered,match


## Prove to myself that url encode/decode works fine

In [96]:
val what = dataFrameOf("original string")(
    "simple",
    "has spaces",
    "has (all) [kinds] `of` *special* characters !@#$%^&*()",
    "has + pluses + ok?"
)
what


original string
simple
has spaces
has (all) [kinds] `of` *special* char...
has + pluses + ok?


In [97]:
what.add("encoded") {
//    URLEncoder.encode(`original string`, Charsets.UTF_8)
    `original string`.urlEncodeSpacesAndParens()
}
    .select {
        `original string` and
                col("encoded") and
                col<String>("encoded")
                    .map {
//                        URLDecoder.decode(it, Charsets.UTF_8)
                        it.urlDecodeSpacesAndParens()
                    }
                    .named("decoded")
    }.add("same?") { `original string` == it.get("decoded") }

original string,encoded,decoded,same?
simple,simple,simple,True
has spaces,has%20spaces,has spaces,True
has (all) [kinds] `of` *special* char...,has%20%28all%29%20%5Bkinds%5D%20%60of...,has (all) [kinds] `of` *special* char...,True
has + pluses + ok?,has%20%2B%20pluses%20%2B%20ok%3F,has + pluses + ok?,True
