Skip to content

Commit

Permalink
Add (but don't use) question and comment scanning tasks
Browse files Browse the repository at this point in the history
  • Loading branch information
Zomis committed Feb 24, 2021
1 parent 7c0f496 commit d26c85e
Show file tree
Hide file tree
Showing 8 changed files with 494 additions and 4 deletions.
4 changes: 3 additions & 1 deletion duga-ktor/build.gradle.kts
Expand Up @@ -20,16 +20,18 @@ application {
repositories {
mavenLocal()
jcenter()
maven { url = uri("https://www.zomis.net/maven") }
maven { url = uri("https://kotlin.bintray.com/ktor") }
maven { url = uri("https://kotlin.bintray.com/kotlinx") }
}

dependencies {
implementation("org.jetbrains.kotlin:kotlin-stdlib-jdk8:$kotlin_version")
implementation("io.ktor:ktor-server-netty:$ktor_version")
implementation("net.zomis:machlearn:0.1.0-SNAPSHOT")
implementation("org.apache.commons:commons-text:1.9")
implementation("org.apache.logging.log4j:log4j-core:2.14.0")
implementation("org.apache.logging.log4j:log4j-slf4j-impl:2.14.0")

implementation("io.ktor:ktor-client-core:$ktor_version")
implementation("io.ktor:ktor-client-core-jvm:$ktor_version")
implementation("io.ktor:ktor-client-apache:$ktor_version")
Expand Down
4 changes: 1 addition & 3 deletions duga-ktor/src/DugaMain.kt
Expand Up @@ -13,10 +13,7 @@ import net.zomis.duga.utils.stats.DugaStatsNoOp
import org.slf4j.LoggerFactory
import java.io.File
import java.time.DayOfWeek
import java.time.LocalTime
import java.time.ZonedDateTime
import java.time.temporal.ChronoUnit
import java.time.temporal.TemporalUnit

object DugaMain {
private val logger = LoggerFactory.getLogger(DugaMain::class.java)
Expand All @@ -36,6 +33,7 @@ object DugaMain {
val gitHubApi = GitHubApi(client.client, readSecret("github"))
val stackExchangeApi = StackExchangeApi(client.client, readSecret("stackexchange"))
val hookString = HookString(DugaStatsNoOp())
val dugaTasks = DugaTasks(poster, stackExchangeApi)

// Instance-specific instructions
runBlocking {
Expand Down
26 changes: 26 additions & 0 deletions duga-ktor/src/net/zomis/duga/DugaTasks.kt
@@ -0,0 +1,26 @@
package net.zomis.duga

import net.zomis.duga.chat.DugaPoster
import net.zomis.duga.utils.stackexchange.StackExchangeApi
import net.zomis.duga.utils.stackexchange.CommentsScanTask
import net.zomis.duga.utils.stackexchange.ProgrammersClassification
import net.zomis.duga.utils.stackexchange.QuestionScanTask
import net.zomis.machlearn.text.TextClassification

class DugaTasks(private val poster: DugaPoster, private val stackApi: StackExchangeApi) {
private val questionScanTask = QuestionScanTask(poster, stackApi, "codereview")
private val programmersClassification: TextClassification
private val commentsScanTask: CommentsScanTask

init {
val trainingData = this::class.java.classLoader.getResource("trainingset-programmers-comments.txt")
val source = trainingData?.readText()
val lines = source?.split("\n")
this.programmersClassification = ProgrammersClassification.machineLearning(lines)
this.commentsScanTask = CommentsScanTask(stackApi, programmersClassification, poster)
}

suspend fun commentScan() = commentsScanTask.run()
suspend fun answerInvalidation() = questionScanTask.run()

}
98 changes: 98 additions & 0 deletions duga-ktor/src/utils/stackexchange/AnswerInvalidationCheck.kt
@@ -0,0 +1,98 @@
package net.zomis.duga.utils.stackexchange

import com.fasterxml.jackson.databind.JsonNode
import net.zomis.duga.chat.DugaPoster
import net.zomis.duga.utils.stackexchange.StackExchangeApi
import org.apache.commons.text.StringEscapeUtils
import org.slf4j.LoggerFactory
import java.time.Instant

object AnswerInvalidationCheck {

private val logger = LoggerFactory.getLogger(AnswerInvalidationCheck::class.java)

suspend fun perform(poster: DugaPoster, result: JsonNode?, lastCheck: Instant, stackExchangeAPI: StackExchangeApi) {
if (result == null) {
logger.error("No questions gathered for Answer Invalidation")
return
}
logger.debug("Answer invalidation check")
val questions = result.get("items")
questions.forEach { question ->
// def created = it.creation_date
// def activity = it.creation_date
val edited = question.get("last_edit_date")?.asLong() ?: 0
val questionLink = question.get("link").asText()
val op = formatDisplayName(question.get("owner").get("display_name").asText())
val questionId = question.get("question_id").asLong()
if (edited >= lastCheck.epochSecond && question.get("answer_count").asLong() > 0) {
logger.info("edited: $questionId")
val edits = stackExchangeAPI.apiCall(editCall(questionId), "codereview", "!9YdnS7lAD")
?: throw IllegalStateException("Unable to get edits for $questionId")
poster.postMessage("20298", "Edits fetched for $questionId: ${edits.get("items").size()}. quota remaining $edits.quota_remaining")
val possibleInvalidations = codeChanges(edits, lastCheck)
if (!possibleInvalidations.isEmpty()) {
val link = questionLink.replace(Regex("/questions/.*"), "/posts/$questionId/revisions")
val editor = possibleInvalidations
.map { formatDisplayName(it.get("user").get("display_name").asText()) }
.joinToString(", ")
poster.postMessage("8595", "*possible answer invalidation by $editor on question by $op:* $link")
}
}
}
}

fun formatDisplayName(displayName: String): String {
return StringEscapeUtils.unescapeHtml4(displayName)
}

fun codeChanged(edits: JsonNode, lastCheck: Instant): Boolean {
return codeChanges(edits, lastCheck).isNotEmpty()
}

fun codeChanges(edits: JsonNode, lastCheck: Instant): List<JsonNode> {
val result = mutableListOf<JsonNode>()
edits.get("items").forEach {
val lastBody = it.get("last_body")
if (lastBody == null || lastBody.isNull) {
return@forEach
}
if (it.get("creation_date").asLong() < lastCheck.epochSecond) {
return@forEach
}
if (it.get("is_rollback").asBoolean()) {
return@forEach
}
val code = stripNonCode(it.get("body").asText())
val codeBefore = stripNonCode(it.get("last_body").asText())
if (!code.equals(codeBefore)) {
result.add(it)
}
}
return result
}

fun editCall(id: Long) = "posts/$id/revisions"

fun stripNonCode(original: String): String {
var post = original.replace(Regex("[\\t ]"), "")
var keepCount = 0
var index = post.indexOf("<code>")
while (index >= 0) {
val endIndex = post.indexOf("</code>")
check(endIndex >= 0)
val before = post.substring(0, keepCount)
val code = post.substring(index + "<code>".length, endIndex)
val after = post.substring(endIndex + "</code>".length)
if (code.contains("\\n") || code.contains('\n')) {
post = before + code + after
keepCount += code.length
} else {
post = before + after
}
index = post.indexOf("<code>")
}
return post.substring(0, keepCount)
}

}
94 changes: 94 additions & 0 deletions duga-ktor/src/utils/stackexchange/CommentClassification.kt
@@ -0,0 +1,94 @@
package net.zomis.duga.utils.stackexchange

import com.fasterxml.jackson.databind.JsonNode
import org.slf4j.LoggerFactory
import java.util.regex.Pattern

object CommentClassification {

private val logger = LoggerFactory.getLogger(CommentClassification::class.java)

val ML_THRESHOLD = 0.3f
val REAL = 0.49f
val DEBUG = 0f

val PROG_LINK = Pattern.compile(
Pattern.quote("<a href=\"http")
+ "s?" +
Pattern.quote("://softwareengineering.stackexchange.com") + "(/|/help/.*)?" + Pattern.quote("\">")
)

fun bodyContainsProgrammersLink(body: String): Boolean {
return PROG_LINK.matcher(body).find();
}

fun calcInterestingLevelProgrammers(comment: JsonNode): Float {
val matchPattern = if (bodyContainsProgrammersLink(comment.get("body").asText())) 1.0f else 0f
return matchPattern + calcInterestingLevelProgrammers(comment.get("body_markdown").asText())
}

fun calcInterestingLevelSoftwareRecs(comment: JsonNode) = calcInterestingLevelSoftwareRecs(comment.get("body_markdown").asText())

fun calcInterestingLevelSoftwareRecs(comment: String): Float {
var points = 0.4f;
points += score(0.3f, comment, "software recommendations")
points += score(0.3f, comment, "softwarerecs")
// points -= score(0.25f, comment, "meta.softwarerecs.stackexchange.com/questions/336/");
// points -= score(0.25f, comment, "meta.softwarerecs.stackexchange.com/q/336/");
points -= score(0.55f, comment, "/336")
return points;
}

fun calcInterestingLevelProgrammers(commentOriginal: String): Float {
var comment = commentOriginal.toLowerCase()
if (!comment.contains("programmers")) {
return 0f
}
if (programmersIgnore(comment)) {
return 0.42f;
}
var points = 0.4f;

points += score(0.3f, comment, "better fit");
points += score(0.3f, comment, "better suited");
points += score(0.3f, comment, "better place");

points += score(0.01f, comment, "close");
points += score(0.05f, comment, "off-topic");
points += score(0.05f, comment, "design");
points += score(0.05f, comment, "whiteboard");
points += score(0.05f, comment, "this question");
points += score(0.15f, comment, "this site");
points += score(0.2f, comment, "programmers.se");
points += score(0.07f, comment, "help at");
points += score(0.07f, comment, "place to ask");
points += score(0.15f, comment, "migrate");
points += score(0.1f, comment, "belong");
points += score(0.02f, comment, "instead");
points += score(0.03f, comment, "the place for");

points += score(0.03f, comment, "try programmers");
points += score(0.03f, comment, "for programmers");
points += score(0.03f, comment, "on programmers");
points += score(0.03f, comment, "at programmers");
points += score(0.03f, comment, "to programmers");

return points;
}

private val programmersIgnore = arrayOf("please stop using programmers.se as your toilet bowl",
"/7265", // http://meta.programmers.stackexchange.com/questions/7265/when-is-a-software-licensing-question-on-topic
"/7182" // http://meta.programmers.stackexchange.com/questions/7182/what-goes-on-programmers-se-a-guide-for-stack-overflow
)

private fun programmersIgnore(comment: String) = programmersIgnore.any { comment.contains(it) }

private fun score(f: Float, comment: String, string: String): Float {
if (comment.contains(string)) {
logger.info("$string --- $comment --- $f")
return f
}
return 0f
}

}
123 changes: 123 additions & 0 deletions duga-ktor/src/utils/stackexchange/CommentsScanTask.kt
@@ -0,0 +1,123 @@
package net.zomis.duga.utils.stackexchange

import com.fasterxml.jackson.databind.JsonNode
import kotlinx.coroutines.GlobalScope
import kotlinx.coroutines.launch

import java.time.Instant;

import net.zomis.duga.chat.DugaPoster
import net.zomis.machlearn.text.TextClassification
import org.slf4j.LoggerFactory

class CommentsScanTask(val stackAPI: StackExchangeApi, val programmersClassification: TextClassification, val poster: DugaPoster) {
private val logger = LoggerFactory.getLogger(CommentsScanTask::class.java)

private var nextFetch = Instant.now()
private var lastComment: Long = 0
private var fromDate: Long = 0
private var remainingQuota: Long = 0

private val codeReview = poster.room("8595")
private val debug = poster.room("20298")
private val programmers = poster.room("21")
private val softwareRecs = poster.room("20298")

fun isInterestingCommentCR(comment: JsonNode): Boolean {
val commentText = comment.get("body_markdown").asText().toLowerCase()
return commentText.contains("code review") || commentText.contains("codereview")
}

suspend fun run() {
if (!Instant.now().isAfter(nextFetch)) {
return;
}

try {
val comments = stackAPI.fetchComments("stackoverflow", fromDate)
if (comments == null) {
logger.error("Unable to get comments from $fromDate")
return
}
val currentQuota = comments.get("quota_remaining").asLong()
if (currentQuota > remainingQuota && fromDate != 0L) {
debug.post(Instant.now().toString() + " Quota has been reset. Was " +
remainingQuota + " is now " + currentQuota)
}
remainingQuota = currentQuota

if (comments.get("backoff") != null) {
nextFetch = Instant.now().plusSeconds(comments.get("backoff").asLong() + 10)
debug.post(Instant.now().toString() +
" Next fetch: " + nextFetch + " because of backoff " + comments.get("backoff"))
return
}

val items = comments.get("items") ?: return
if (items.size() >= 100) {
debug.post(Instant.now().toString() + " Warning: Retrieved 100 comments. Might have missed some.")
}

val previousLastComment = lastComment
for (comment in items.reversed()) {
if (comment.get("comment_id").asLong() <= previousLastComment) {
continue;
}
lastComment = Math.max(comment.get("comment_id").asLong(), lastComment)
fromDate = Math.max(comment.get("creation_date").asLong(), fromDate)
if (isInterestingCommentCR(comment)) {
logComment(comment, "Code Review")
codeReview.post(comment.get("link").asText())
}

classifyProgrammers(comment)

val softwareCertainty = CommentClassification.calcInterestingLevelSoftwareRecs(comment)

if (softwareCertainty >= CommentClassification.REAL) {
softwareRecs.post(comment.get("link").asText())
}
}
} catch (e: Exception) {
logger.error("Error retrieving comments", e);
debug.post(Instant.now().toString() + " Exception in comment task " + e)
}
}

fun logComment(comment: JsonNode, site: String) {
logger.info("$site comment $comment.comment_id " +
"on $comment.post_type $comment.post_id " +
"posted by $comment.owner.display_name " +
"with $comment.owner.reputation reputation: $comment.body_markdown")
}

fun classifyProgrammers(comment: JsonNode) {
val oldClassification = CommentClassification.calcInterestingLevelProgrammers(comment);
val programmersMLscore = programmersMLscore(comment)

if (programmersMLscore >= CommentClassification.ML_THRESHOLD) {
programmers.postAsync(comment.get("link").asText())
}

if (programmersMLscore >= CommentClassification.DEBUG) {
logComment(comment, "Software Engineering (ML $programmersMLscore old $oldClassification)")
val certaintyLevelMessage =
"ML Classification " + programmersMLscore +
" (Old classification " + oldClassification + ")";
GlobalScope.launch {
debug.post(certaintyLevelMessage)
debug.post(comment.get("link").asText())
}
}
}

fun programmersMLscore(comment: JsonNode): Double {
val text = comment.get("body_markdown").asText()
if (!text.toLowerCase().contains("programmers") && !text.toLowerCase().contains("softwareeng")
&& !text.toLowerCase().contains("software eng")) {
// No need to check with the Machine Learning system in this case
return -1.0
}
return programmersClassification.score(text)
}
}

0 comments on commit d26c85e

Please sign in to comment.