sansa-rdf/sansa-rdf-common/src/main/scala/net/sansa_stack/rdf/common/partition/r2rml/R2rmlUtils.scala

package net.sansa_stack.rdf.common.partition.r2rml

import net.sansa_stack.rdf.common.partition.core.{RdfPartitionStateDefault, RdfPartitioner}
import org.aksw.r2rml.jena.arq.lib.R2rmlLib
import org.aksw.r2rml.jena.domain.api._
import org.aksw.r2rml.jena.vocab.RR
import org.aksw.r2rmlx.domain.api.TermMapX
import org.aksw.sparqlify.core.sql.common.serialization.{SqlEscaper, SqlEscaperBacktick}
import org.apache.jena.graph.NodeFactory
import org.apache.jena.rdf.model.{Model, ModelFactory, ResourceFactory}
import org.apache.jena.sparql.core.Var
import org.apache.jena.sparql.expr.ExprVar

import scala.reflect.runtime.universe.MethodSymbol

object R2rmlUtils {
  implicit def newExprVar(varName: String): ExprVar = new ExprVar(Var.alloc(varName))
  implicit def newExprVar(varId: Int): ExprVar = "_" + varId

  def newExprVar(i: Int, attrNames: List[String]): ExprVar = {
    val attrName = attrNames(i)
    attrName
  }


  def createR2rmlMappings(partitioner: RdfPartitioner[RdfPartitionStateDefault],
                          partition: RdfPartitionStateDefault,
                          model: Model,
                          explodeLanguageTags: Boolean,
                          escapeIdentifiers: Boolean): Seq[TriplesMap] = {
    createR2rmlMappings(
        partitioner,
        partition,
        x => createDefaultTableName(x), // Map the partition to a name
        new SqlEscaperBacktick,
        model,
        explodeLanguageTags,
      escapeIdentifiers)
  }

  def createR2rmlMappings(partitioner: RdfPartitioner[RdfPartitionStateDefault],
                          partitions: Seq[RdfPartitionStateDefault],
                          model: Model,
                          explodeLanguageTags: Boolean,
                          escapeIdentifiers: Boolean): Seq[TriplesMap] = {
    partitions
      .flatMap(p => createR2rmlMappings(
        partitioner,
        p,
        model,
        explodeLanguageTags,
        escapeIdentifiers))
  }

  def createR2rmlMappings(partitioner: RdfPartitioner[RdfPartitionStateDefault],
                          partitions: Seq[RdfPartitionStateDefault],
                          extractTableName: RdfPartitionStateDefault => String,
                          sqlEscaper: SqlEscaper,
                          model: Model,
                          explodeLanguageTags: Boolean,
                          escapeIdentifiers: Boolean): Seq[TriplesMap] = {
    partitions
      .flatMap(p => createR2rmlMappings(
        partitioner,
        p,
        extractTableName,
        sqlEscaper,
        model,
        explodeLanguageTags,
        escapeIdentifiers))
  }


  /**
   * Transform a RdfPartitionStateDefault into a sequence of R2RML mappings.
   * If the language handling strategy demands a dedicated column for language tags then the
   * resulting R2RML contains the non-standard 'rr:langColumn' property.
   *
   * FIXME Creating mappings per language tag needs yet to be implemented
   *
   * @param partitioner      The partitioner
   * @param partitionState   The partition state generated by the partitioner
   * @param extractTableName A function to obtain a table name from the partition state
   * @param sqlEscaper       SQL escaping policies for table names, column names, string literals and aliases
   * @param outModel         The output model
   * @param explodeLanguageTags If true then a mapping is generated for each language tag listed in the partition state.
   *                            Otherwise a generic language column is introduces
   * @param escapeIdentifiers if all SQL identifiers have to be escaped
   * @return                 The set of {@link TriplesMap}s added to the output model
   */
  def createR2rmlMappings(partitioner: RdfPartitioner[RdfPartitionStateDefault],
                          partitionState: RdfPartitionStateDefault,
                          extractTableName: RdfPartitionStateDefault => String,
                          sqlEscaper: SqlEscaper,
                          outModel: Model,
                          explodeLanguageTags: Boolean,
                          escapeIdentifiers: Boolean): Seq[TriplesMap] = {
    val p = partitionState // Shorthand
    val t = partitioner.determineLayout(partitionState).schema

    var attrNames = t.members.sorted.collect({ case m: MethodSymbol if m.isCaseAccessor => m.name.toString })
    if (escapeIdentifiers) {
      attrNames = attrNames.map(sqlEscaper.escapeColumnName)
    }

    val predicateIri: String = partitionState.predicate
    var tableName = extractTableName(partitionState)
    if (escapeIdentifiers) {
      tableName = sqlEscaper.escapeTableName(tableName)
    }

    if (explodeLanguageTags && attrNames.length == 3) {
      val escapedColumns = if (escapeIdentifiers) attrNames else attrNames.map(sqlEscaper.escapeColumnName)
      val projectedColumns = escapedColumns.slice(0, 2)
      val columnsSql = projectedColumns.mkString(", ")
      val langColSql = escapedColumns(2)

      // if there is only one language tag, we can omit the SQL query with the FILTER on the lang column
      if (p.languages.size == 1) {
        // TODO put to outer if-else and just add rr:language attribute
        // TODO for this case we wouldn'T even need a table with a lang column, as long as the mapping keeps track of the language
        val tm: TriplesMap = outModel.createResource.as(classOf[TriplesMap])
        val pom: PredicateObjectMap = tm.addNewPredicateObjectMap()
        pom.addPredicate(predicateIri)

        // create subject map
        val sm: SubjectMap = tm.getOrSetSubjectMap()
        setTermMapForNode(sm, 0, attrNames, p.subjectType, "", false)

        // and the object map
        val om: ObjectMap = pom.addNewObjectMap()
        om.setColumn(escapedColumns(1))
        if (p.languages.head.trim.nonEmpty) om.setLanguage(p.languages.head)

        tm.getOrSetLogicalTable().asBaseTableOrView().setTableName(tableName)

        Seq(tm)
      } else {
        p.languages.map(lang => {
          val tableNameSql = if (escapeIdentifiers) tableName else sqlEscaper.escapeTableName(tableName)
          val langSql = sqlEscaper.escapeStringLiteral(lang)

          val tm: TriplesMap = outModel.createResource.as(classOf[TriplesMap])

          // create subject map
          val sm: SubjectMap = tm.getOrSetSubjectMap()
          setTermMapForNode(sm, 0, escapedColumns, p.subjectType, "", false)

          val pom: PredicateObjectMap = tm.addNewPredicateObjectMap()
          pom.addPredicate(predicateIri)

          val om: ObjectMap = pom.addNewObjectMap()
          om.setColumn(escapedColumns(1))
          if (lang.trim.nonEmpty) om.setLanguage(lang)

          tm.getOrSetLogicalTable().asR2rmlView().setSqlQuery(s"SELECT $columnsSql FROM $tableNameSql WHERE $langColSql = $langSql")

          tm
        }).toSeq
      }
    } else {
      val tm: TriplesMap = outModel.createResource.as(classOf[TriplesMap])
      val pom: PredicateObjectMap = tm.addNewPredicateObjectMap()
      pom.addPredicate(predicateIri)

      val sm: SubjectMap = tm.getOrSetSubjectMap()
      val om: ObjectMap = pom.addNewObjectMap()

      // create subject map
      setTermMapForNode(sm, 0, attrNames, p.subjectType, "", false)
      setTermMapForNode(om, 1, attrNames, p.objectType, p.datatype, p.langTagPresent)

      tm.getOrSetLogicalTable().asBaseTableOrView().setTableName(tableName)

      Seq(tm)
    }
  }

  def setTermMapForNode(target: TermMap, offset: Int, attrNames: List[String], termType: Byte, datatype: String, langTagPresent: Boolean): TermMap = {
    // val o = offset + 1
    val o = offset

    val on = newExprVar(o, attrNames)

    termType match {
      // TODO The RR.IRI.inModel(...) is a workaround right now
      case 0 => target.setColumn(attrNames(o)).setDatatype(RR.BlankNode.inModel(target.getModel))
      case 1 => target.setColumn(attrNames(o)).setDatatype(RR.IRI.inModel(target.getModel))
      case 2 =>
        target.setColumn(attrNames(o))
        if (langTagPresent) {
          target.as(classOf[TermMapX]).setLangColumn(attrNames(o + 1))
        } else {
          target.setDatatype(ResourceFactory.createProperty(datatype))
        }
      // case 2 if(!Option(datatype).getOrElse("").isEmpty) => E_RdfTerm.createTypedLiteral(o, o + 1)
      case _ => throw new RuntimeException("Unhandled case")
    }

    target
  }


  /**
   * Creates a SQL table name for a partition.
   *
   * FIXME Consolidate with SQLUtils in the spark module?
   *
   * @param p the RDF partition
   * @return
   */
  def createDefaultTableName(p: RdfPartitionStateDefault): String = {

    // For now let's just use the full predicate as the uri
    // val predPart = pred.substring(pred.lastIndexOf("/") + 1)
    val predPart = p.predicate
    val pn = NodeFactory.createURI(p.predicate)

    val dt = p.datatype
    val dtPart = if (dt != null && dt.nonEmpty) "_" + dt.substring(dt.lastIndexOf("/") + 1) else ""
    val langPart = if (p.langTagPresent) "_lang" else ""

    val sTermTypePart = if (p.subjectType == 0) "sbn" else ""
    val oTermTypePart = if (p.objectType == 0) "obn" else ""

    val tableName = predPart + dtPart + langPart + sTermTypePart + oTermTypePart

    tableName
  }

  /**
   * Imports the RDF partition states as `TriplesMap` from the given RDF data model.
   *
   * @param model the model
   * @return the RDF partition states as `TriplesMap`
   */
  def streamTriplesMaps(model: Model): Iterator[TriplesMap] = {
    import collection.JavaConverters._
    R2rmlLib.streamTriplesMaps(model).iterator().asScala.toIterator
  }
}