Skip to content

Commit

Permalink
even more changes to make this work with immutable
Browse files Browse the repository at this point in the history
  • Loading branch information
RayanRal committed Jun 9, 2023
1 parent 99f2c21 commit 433da22
Showing 1 changed file with 8 additions and 14 deletions.
22 changes: 8 additions & 14 deletions src/main/scala/com/gmail/rayanral/index/IndexGenerator.scala
Original file line number Diff line number Diff line change
Expand Up @@ -6,27 +6,21 @@ import com.gmail.rayanral.index.util.StringUtils
import com.gmail.rayanral.index.util.StringUtils.StringExt
import org.apache.logging.log4j.scala.Logging

import java.util.StringTokenizer

class IndexGenerator(filesToIndex: List[String]) extends Logging {

private val index = InvertedIndex()

def generateIndex(): InvertedIndex = {
logger.info("Indexer started")
filesToIndex.foreach { fileName =>
val doc = readFile(fileName)
processDocument(index, doc)
filesToIndex.map(readFile).foldLeft(InvertedIndex()) { case (idx, doc) =>
val updIdx = processDocument(idx, doc)
updIdx
}
index
}

private def processDocument(index: InvertedIndex, document: GenericDocument): Unit = {
val st = new StringTokenizer(document.text)
while (st.hasMoreTokens) {
processToken(st.nextToken).foreach(t => index.add(t, document.fileName))
}
}
private def processDocument(index: InvertedIndex, document: GenericDocument): InvertedIndex =
document.text
.split(" ")
.flatMap(processToken)
.foldLeft(index)(_.add(_, document.fileName))

private def processToken(token: String): Option[String] = {
val tokenLower = token.toLowerCase
Expand Down

0 comments on commit 433da22

Please sign in to comment.