In [None]:
import $ivy.`com.github.tototoshi::scala-csv:1.3.6`
import com.github.tototoshi.csv._

# Methods

## Random tensors
This method can be used to create random tensors, by specifying the number of dimensions, their size (all dimensions have the same size), and the sparsity of the tensor.

In [None]:
def createTensor(nbDimensions: Int, size: Long, sparsity: Double): Boolean = {
    if (size * nbDimensions > math.pow(size, nbDimensions) * sparsity) {
        println("Not enough values to create tensor, " +
        s"asked ${(math.pow(size, nbDimensions) * sparsity).toLong}, minimum : ${size * nbDimensions}.")
        return false
    } else {
        println(s"Create tensor of $nbDimensions dimensions of size $size with ${(math.pow(size, nbDimensions) * sparsity).toLong} elements.")
    }
    val f = new java.io.File(s"sample_tensors/tensor_${nbDimensions}_${size}_${sparsity}.csv")
    val writer = CSVWriter.open(f)
    writer.writeRow((for (i <- 0 until nbDimensions) yield s"d$i").toList :+ "val")
    
    for (i <- 0 until nbDimensions) {
        val toWrite = (for (j <- 0L until size) yield {
            (for (k <- 0 until nbDimensions) yield {
                if (k == i) {
                    j
                } else {
                    math.abs(scala.util.Random.nextLong()) % size
                }
            }).toList :+ math.abs(scala.util.Random.nextDouble() * 100)
        }).toList
        writer.writeAll(toWrite)
    }
    var nbElements = size * nbDimensions
    while (nbElements < math.pow(size, nbDimensions) * sparsity) {
        var toWrite = (for (k <- 0 until nbDimensions) yield 
                math.abs(scala.util.Random.nextLong()) % size
        ).toList :+ math.abs(scala.util.Random.nextDouble() * 100)
        writer.writeRow(toWrite)
        nbElements += 1
    }
    writer.close()
    true
}

## Random tensors with clusters
This method can be used to create random tensors, by specifying the number of dimensions, their size (all dimensions have the same size), and the sparsity of the tensor. Some clusters are added in the tensor (a set of elements that are close to each other).

In [None]:
def createTensorWithCluster(nbDimensions: Int, size: Long, sparsity: Double, 
                            nbClusters: Int, clusterSize: Int, nbValuesInCluster: Int): Boolean = {
    if (size * nbDimensions > math.pow(size, nbDimensions) * sparsity) {
        println("Not enough values to create tensor, " +
        s"asked ${(math.pow(size, nbDimensions) * sparsity).toLong}, minimum : ${size * nbDimensions}.")
        return false
    } else {
        println(s"Create tensor of $nbDimensions dimensions of size $size with ${(math.pow(size, nbDimensions) * sparsity).toLong} elements.")
    }
    
    val f = new java.io.File(s"sample_tensors/tensor_${nbDimensions}_${size}_${sparsity}_${nbClusters}clusters${clusterSize}.csv")
    val writer = CSVWriter.open(f)
    writer.writeRow((for (i <- 0 until nbDimensions) yield s"d$i").toList :+ "val")
    
    // Clusters
    var iExcludes = List[Long]()
    val clusters = new Array[List[Any]](nbClusters)
    for (c <- 0 until nbClusters) {
        var cluster = List[Long]()
        for (m <- 0 until clusterSize) {
            var i = math.abs(scala.util.Random.nextLong()) % size
            while (iExcludes.contains(i)) {
                i = math.abs(scala.util.Random.nextLong()) % size
            }
            cluster :+= i
            iExcludes :+= i
        }
        println("New cluster : " + cluster.mkString(", "))
        for (e <- 0 until nbValuesInCluster) {
            val entry = (for (d <- 1 until nbDimensions) yield {
                math.abs(scala.util.Random.nextLong()) % size
            }).toList
            val value = math.abs(scala.util.Random.nextDouble() * 100)
            for (m <- cluster) {
                val epsilon = scala.util.Random.nextDouble() % 0.1
                writer.writeRow(m +: entry :+ (math.abs(value + epsilon)))
            }
        }
    }
    
    // At least 1 value for each dimension's value
    for (i <- 0 until nbDimensions) {
        val toWrite = (for (j <- 0L until size if i != 0 || (i == 0 && !iExcludes.contains(j))) yield {
            (for (k <- 0 until nbDimensions) yield {
                if (k == i) {
                    j
                } else {
                    if (k != 0) {
                        math.abs(scala.util.Random.nextLong()) % size
                    } else {
                        var v = math.abs(scala.util.Random.nextLong()) % size
                        while (iExcludes.contains(v)) {
                            v = math.abs(scala.util.Random.nextLong()) % size
                        }
                        v
                    }
                    
                }
            }).toList :+ math.abs(scala.util.Random.nextDouble() * 100)
        }).toList
        writer.writeAll(toWrite)
    }
    
    // Add elements until sparsity reached
    var nbElements = size * nbDimensions + nbClusters * clusterSize
    while (nbElements < math.pow(size, nbDimensions) * sparsity) {
        var toWrite = (for (k <- 0 until nbDimensions) yield 
                math.abs(scala.util.Random.nextLong()) % size
        ).toList :+ math.abs(scala.util.Random.nextDouble() * 100)
        if (!iExcludes.contains(toWrite(0))) {
            writer.writeRow(toWrite)
            nbElements += 1
        }
    }
    writer.close()
    true
}

# Tensors creation
Create several tensors for the number of dimensions, sizes and sparsities given.

In [None]:
for (dimension <- 3 to 5) {
    for (size <- List(1000, 10000, 100000)) {
        for (sparsity <- List(1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6, 1e-7, 1e-8, 1e-9, 1e-10)) {
            if (math.pow(size, dimension) * sparsity <= 1000000000) {
                if (!new java.io.File(s"sample_tensors/tensor_${dimension}_${size}_${sparsity}.csv").exists) {
                    createTensor(dimension, size, sparsity)
                } else {
                    println(s"File sample_tensors/tensor_${dimension}_${size}_${sparsity}.csv already exists.")
                }
            } else {
                println(s"Too much elements (${math.pow(size, dimension) * sparsity})")
            }
        }
    }
}