-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
7 changed files
with
134 additions
and
64 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
import kotlinx.serialization.Serializable | ||
|
||
@Serializable | ||
data class PlanetWikiData( | ||
val name: String = "", | ||
val type: String = "", | ||
val temperature: String = "", | ||
val atmosphere: String = "", | ||
val magnetosphere: String = "", | ||
val fauna: String = "", | ||
val flora: String = "", | ||
val water: String = "", | ||
val resources: List<String> = listOf(), | ||
val traits: List<String> = listOf(), | ||
) | ||
|
||
@Serializable | ||
data class MissionWikiData( | ||
val name: String = "", | ||
val id: String = "", | ||
val type: MissionType = MissionType.OTHER, | ||
) | ||
|
||
@Serializable | ||
data class FaunaWikiData( | ||
val name: String = "", | ||
val temperament: Temperament = Temperament.UNKNOWN, | ||
val planets: List<String> = listOf(), | ||
val biomes: List<String> = listOf(), | ||
val resource: String, | ||
val drops: List<String> = listOf(), | ||
val abilities: List<String> = listOf(), | ||
val other: Map<String, String> = mapOf() | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
[ | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
package wikiScraper | ||
|
||
import FaunaWikiData | ||
import MissionWikiData | ||
import jsonMapper | ||
import kotlinx.serialization.decodeFromString | ||
import kotlinx.serialization.encodeToString | ||
import org.jsoup.Jsoup | ||
import toMissionType | ||
import java.io.File | ||
|
||
private const val onlyOne = true | ||
private const val start = 0 | ||
private const val limit = 0 | ||
private const val chunkSize = 100 | ||
|
||
fun main() { | ||
val faunaUrlFile = File("raw-data/fauna-pages.txt") | ||
if (!faunaUrlFile.exists()) faunaUrlFile.writeText("") | ||
fetchFaunaPagesIfEmpty(faunaUrlFile) | ||
|
||
val output = File("src/jsMain/resources/fauna-wiki-data.json") | ||
val existing = (if (output.exists()) { | ||
jsonMapper.decodeFromString<List<FaunaWikiData>>(output.readText()) | ||
} else listOf()).associateBy { it.name }.toMutableMap() | ||
|
||
println("Reading Fauna") | ||
faunaUrlFile.readLines() | ||
.also { println("Found a total of ${it.size} urls") } | ||
.let { if (onlyOne) it.take(1) else it.drop(start) } | ||
.let { if (limit > 0) it.take(limit) else it} | ||
.also { println("Crawling ${it.size} urls") } | ||
.chunked(chunkSize).flatMap { chunk -> | ||
println("Processing next $chunkSize, starting with ${chunk.first()}") | ||
chunk.flatMap { fetchAndParseFauna(it) } | ||
} | ||
.forEach {fauna -> | ||
existing[fauna.name] = fauna | ||
} | ||
|
||
output.writeText(jsonMapper.encodeToString(existing.values)) | ||
} | ||
|
||
private fun fetchFaunaPagesIfEmpty(fauna: File) { | ||
if (fauna.readLines().isEmpty()) { | ||
val urls = crawl("https://starfieldwiki.net/wiki/Category:Starfield-Creatures-All", onlyOne).toSet() | ||
fauna.writeText(urls.joinToString("\n")) | ||
} | ||
} | ||
|
||
private fun fetchAndParseFauna(url: String): List<FaunaWikiData> { | ||
val page = Jsoup.connect(url).get() | ||
val variantTables = page.select(".wikitable") | ||
val singleTable = page.select(".wikitable") | ||
return listOf() | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters