From d0907cc0e55f995b72d19eeb67fbd270b8c7decd Mon Sep 17 00:00:00 2001 From: Clara Martins Date: Tue, 4 Jan 2022 09:59:04 +0000 Subject: [PATCH] Added schema to flatten the 2 cores into 1 --- Makefile | 17 +++- prepare.py | 25 +++++ solr/schema.json | 258 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 298 insertions(+), 2 deletions(-) create mode 100644 solr/schema.json diff --git a/Makefile b/Makefile index 17cc4b6..c43ca6c 100644 --- a/Makefile +++ b/Makefile @@ -66,7 +66,17 @@ solr-reviews : solr/reviews_schema.json data/reviews.json docker cp data/reviews.json $(cn):/reviews.json docker exec $(cn) bin/post -c reviews /reviews.json -solr : solr-games solr-reviews +solr-multicore: solr/schema.json data/games_and_reviews.json + docker exec $(cn) bin/solr create_core -c games_and_reviews + docker cp solr/enums_config.xml $(cn):/var/solr/data/games_and_reviews/enums_config.xml + docker cp solr/synonyms.txt $(cn):/var/solr/data/games_and_reviews/conf/synonyms.txt + curl -X POST -H 'Content-Type:application/json' \ + --data-binary @solr/schema.json \ + http://localhost:8983/solr/games_and_reviews/schema + docker cp data/games_and_reviews.json $(cn):/games_and_reviews.json + docker exec $(cn) bin/post -c games_and_reviews /games_and_reviews.json + +solr : solr-games solr-reviews solr-multicore clean-solr-games : docker exec $(cn) bin/solr delete -c games @@ -74,7 +84,10 @@ clean-solr-games : clean-solr-reviews : docker exec $(cn) bin/solr delete -c reviews -clean-solr : clean-solr-games clean-solr-reviews +clean-solr-multicore : + docker exec $(cn) bin/solr delete -c games_and_reviews + +clean-solr : clean-solr-games clean-solr-reviews clean-solr-multicore query: /usr/bin/python3 python3 solr/query.py \ No newline at end of file diff --git a/prepare.py b/prepare.py index b49a2b4..d8447dd 100644 --- a/prepare.py +++ b/prepare.py @@ -182,5 +182,30 @@ def main(): print(Fore.GREEN + '\nDone.\n' + Style.RESET_ALL) + print(Fore.MAGENTA + Style.BRIGHT + '\n--- Flattening Data into a Single Index ---\n') + + print(Fore.CYAN + '- Reading Games File...') + games_file = open('data/steam.json') + games = json.load(games_file) + games_file.close() + + print(Fore.CYAN + '- Reading Reviews File...') + reviews_file = open('data/reviews.json') + reviews = json.load(reviews_file) + reviews_file.close() + + print(Fore.CYAN + '- Adding the type...') + for game in games: + game['type'] = 'game' + for review in reviews: + review['type'] = 'review' + + print(Fore.CYAN + '- Writing Games and Reviews File...') + file = open('data/games_and_reviews.json', 'w') + json.dump(games + reviews, file, separators=(',', ':')) + file.close() + + print(Fore.GREEN + '\nDone.\n' + Style.RESET_ALL) + if __name__ == '__main__': main() diff --git a/solr/schema.json b/solr/schema.json new file mode 100644 index 0000000..9636fae --- /dev/null +++ b/solr/schema.json @@ -0,0 +1,258 @@ +{ + "add-field-type": [ + { + "name": "text_name", + "class": "solr.TextField", + "indexAnalyzer": { + "tokenizer": { + "class": "solr.StandardTokenizerFactory" + }, + "filters": [ + { + "class": "solr.ASCIIFoldingFilterFactory", + "preserveOriginal": true + }, + { + "class": "solr.LowercaseFilterFactory" + } + ] + }, + "queryAnalyzer": { + "tokenizer": { + "class": "solr.StandardTokenizerFactory" + }, + "filters": [ + { + "class": "solr.ASCIIFoldingFilterFactory", + "preserveOriginal": true + }, + { + "class": "solr.LowercaseFilterFactory" + }, + { + "class": "solr.SynonymGraphFilterFactory", + "synonyms": "synonyms.txt", + "ignoreCase": true + } + ] + } + }, + { + "name": "enum_owners", + "class": "solr.EnumFieldType", + "indexed": true, + "docValues": true, + "enumsConfig": "enums_config.xml", + "enumName": "owners" + }, + { + "name": "enum_platform", + "class": "solr.EnumFieldType", + "indexed": true, + "docValues": true, + "enumsConfig": "enums_config.xml", + "enumName": "platform" + }, + { + "name": "enum_protondb_tier", + "class": "solr.EnumFieldType", + "indexed": true, + "docValues": true, + "enumsConfig": "enums_config.xml", + "enumName": "protondb_tier" + } + ], + "add-field": [ + { + "name": "type", + "type": "text_name", + "required": true + }, + { + "name": "appid", + "type": "pint", + "required": true, + "indexed": false + }, + { + "name": "name", + "type": "text_name", + "required": true + }, + { + "name": "release_date", + "type": "pdate" + }, + { + "name": "english", + "type": "boolean" + }, + { + "name": "developer", + "type": "text_name", + "required": true, + "multiValued": true + }, + { + "name": "publisher", + "type": "text_name", + "multiValued": true + }, + { + "name": "platforms", + "type": "enum_platform", + "multiValued": true + }, + { + "name": "required_age", + "type": "pint" + }, + { + "name": "categories", + "type": "text_name", + "multiValued": true + }, + { + "name": "genres", + "type": "text_name", + "multiValued": true + }, + { + "name": "steamspy_tags", + "type": "text_name", + "required": true, + "multiValued": true + }, + { + "name": "achievements", + "type": "pint" + }, + { + "name": "positive_ratings", + "type": "pint" + }, + { + "name": "negative_ratings", + "type": "pint" + }, + { + "name": "total_ratings", + "type": "pint" + }, + { + "name": "review_score", + "type": "pfloat" + }, + { + "name": "weighted_score", + "type": "pfloat" + }, + { + "name": "average_playtime", + "type": "pint" + }, + { + "name": "median_playtime", + "type": "pint", + "required": true + }, + { + "name": "price", + "type": "pfloat" + }, + { + "name": "owners", + "type": "enum_owners" + }, + { + "name": "detailed_description", + "type": "text_en" + }, + { + "name": "short_description", + "type": "text_en" + }, + { + "name": "about_the_game", + "type": "text_en" + }, + { + "name": "main_time", + "type": "pint" + }, + { + "name": "main_reports", + "type": "pint" + }, + { + "name": "extras_time", + "type": "pint" + }, + { + "name": "extras_reports", + "type": "pint" + }, + { + "name": "completionist_time", + "type": "pint" + }, + { + "name": "completionist_reports", + "type": "pint" + }, + { + "name": "protondb_reports", + "type": "pint" + }, + { + "name": "protondb_tier", + "type": "enum_protondb_tier" + }, + + { + "name": "author_steamid", + "type": "pint", + "indexed": false + }, + { + "name": "playtime_at_review", + "type": "pint" + }, + { + "name": "review", + "type": "text_en" + }, + { + "name": "created", + "type": "pdate" + }, + { + "name": "updated", + "type": "pdate" + }, + { + "name": "recommended", + "type": "boolean" + }, + { + "name": "votes_up", + "type": "pint" + }, + { + "name": "votes_funny", + "type": "pint" + }, + { + "name": "vote_score", + "type": "pfloat" + }, + { + "name": "steam_purchase", + "type": "boolean" + }, + { + "name": "received_for_free", + "type": "boolean" + } + ] +} \ No newline at end of file