This repository has been archived by the owner on Jun 22, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 17
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* initial crawler commit * Added readme * Bugfixes * removed Junk * Bugfix * bugfixes, added player data, sample data * [CODE] Duplicate entries removed * [FEATURE] Added Batting and bowling crawlers to webcrawler (fixes #42,fixes #43) Co-authored-by: Bastian Große <bastiangrosse@pop-os.localdomain> Co-authored-by: scientes <scientes12@gmail.com> Co-authored-by: Royston E Tauro <54945757+lucasace@users.noreply.github.com>
- Loading branch information
1 parent
97f6e1a
commit 6272197
Showing
15 changed files
with
49,537 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,130 @@ | ||
__pycache__/*.pyc | ||
# Byte-compiled / optimized / DLL files | ||
__pycache__/ | ||
*.py[cod] | ||
*$py.class | ||
|
||
# C extensions | ||
*.so | ||
|
||
# Distribution / packaging | ||
.Python | ||
build/ | ||
develop-eggs/ | ||
dist/ | ||
downloads/ | ||
eggs/ | ||
.eggs/ | ||
lib/ | ||
lib64/ | ||
parts/ | ||
sdist/ | ||
var/ | ||
wheels/ | ||
pip-wheel-metadata/ | ||
share/python-wheels/ | ||
*.egg-info/ | ||
.installed.cfg | ||
*.egg | ||
MANIFEST | ||
|
||
# PyInstaller | ||
# Usually these files are written by a python script from a template | ||
# before PyInstaller builds the exe, so as to inject date/other infos into it. | ||
*.manifest | ||
*.spec | ||
|
||
# Installer logs | ||
pip-log.txt | ||
pip-delete-this-directory.txt | ||
|
||
# Unit test / coverage reports | ||
htmlcov/ | ||
.tox/ | ||
.nox/ | ||
.coverage | ||
.coverage.* | ||
.cache | ||
nosetests.xml | ||
coverage.xml | ||
*.cover | ||
*.py,cover | ||
.hypothesis/ | ||
.pytest_cache/ | ||
|
||
# Translations | ||
*.mo | ||
*.pot | ||
|
||
# Django stuff: | ||
*.log | ||
local_settings.py | ||
db.sqlite3 | ||
db.sqlite3-journal | ||
|
||
# Flask stuff: | ||
instance/ | ||
.webassets-cache | ||
|
||
# Scrapy stuff: | ||
.scrapy | ||
|
||
# Sphinx documentation | ||
docs/_build/ | ||
|
||
# PyBuilder | ||
target/ | ||
|
||
# Jupyter Notebook | ||
.ipynb_checkpoints | ||
|
||
# IPython | ||
profile_default/ | ||
ipython_config.py | ||
|
||
# pyenv | ||
.python-version | ||
|
||
# pipenv | ||
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. | ||
# However, in case of collaboration, if having platform-specific dependencies or dependencies | ||
# having no cross-platform support, pipenv may install dependencies that don't work, or not | ||
# install all needed dependencies. | ||
#Pipfile.lock | ||
|
||
# PEP 582; used by e.g. github.com/David-OConnor/pyflow | ||
__pypackages__/ | ||
|
||
# Celery stuff | ||
celerybeat-schedule | ||
celerybeat.pid | ||
|
||
# SageMath parsed files | ||
*.sage.py | ||
|
||
# Environments | ||
.env | ||
.venv | ||
env/ | ||
venv/ | ||
ENV/ | ||
env.bak/ | ||
venv.bak/ | ||
|
||
# Spyder project settings | ||
.spyderproject | ||
.spyproject | ||
|
||
# Rope project settings | ||
.ropeproject | ||
|
||
# mkdocs documentation | ||
/site | ||
|
||
# mypy | ||
.mypy_cache/ | ||
.dmypy.json | ||
dmypy.json | ||
|
||
# Pyre type checker | ||
.pyre/ | ||
.vscode/settings.json |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
# Installation: | ||
|
||
`pip3 install scrapy python-dateutil` | ||
|
||
# Usage: | ||
|
||
`cd crawler/cricketcrawler` | ||
`scrapy crawl howstat` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
import os | ||
|
||
if not os.path.exists("../../data_crawler"): | ||
os.makedirs("../../data_crawler/ODI") | ||
os.mkdir("../../data_crawler/T20") | ||
os.mkdir("../../data_crawler/TEST") | ||
open("../../data_crawler/id_names.csv", "w+").close() | ||
open("../../data_crawler/ODI/match_ids.csv", "w+").close() | ||
open("../../data_crawler/T20/match_ids.csv", "w+").close() | ||
open("../../data_crawler/TEST/match_ids.csv", "w+").close() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
from scrapy.exporters import CsvItemExporter | ||
|
||
class CsvItemExporter_M(CsvItemExporter): | ||
""" | ||
Fixes Bug in Scrapy | ||
""" | ||
def finish_exporting(self): | ||
self.stream.close() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
""" | ||
This modules defines all the fantasy leagues that | ||
the project supports | ||
""" | ||
|
||
class Scorer: | ||
""" | ||
Scorer class to use different formats of the game | ||
:py:class `t20`: A T20 dictionary which contains scoring metrics | ||
:py:class `odi`: A ODI dictionary which contains scoring metrics | ||
:py:class `test`: A TEST dictionary which contains scoring metrics | ||
:param scoring_dict: A dictionary containing scoring metrics. | ||
Each value represents a list where index 0 represents `T20`, | ||
1 represents `ODI`, 2 represents `TEST` | ||
""" | ||
|
||
def __init__(self, scoring_dict): | ||
|
||
self.t20 = {key: scoring_dict[key][0] for key in scoring_dict} | ||
self.odi = {key: scoring_dict[key][1] for key in scoring_dict} | ||
self.test = {key: scoring_dict[key][2] for key in scoring_dict} | ||
|
||
def get_score(self, stats, playing_format): | ||
""" | ||
Function which sums the score for that platform | ||
depending on the format of the game | ||
:param playing_format: One of `ODI`,`T20`,`TEST` | ||
""" | ||
type_map = {"ODI": self.odi, "T20": self.t20, "TEST": self.test} | ||
return sum(type_map[playing_format][key] * stats[key] for key in stats) | ||
|
||
|
||
class Dream11: | ||
"""Dream11 League | ||
Supported platforms: | ||
* ODI | ||
* T20 | ||
* TEST | ||
""" | ||
|
||
name = "Dream11" | ||
|
||
batting_dict = Scorer( | ||
{ | ||
"runs": [1, 1, 1], | ||
"boundaries": [1, 1, 1], | ||
"sixes": [2, 2, 2], | ||
"50": [8, 4, 4], | ||
"100": [16, 8, 8], | ||
"duck": [-2, -3, -4], | ||
} | ||
) | ||
|
||
bowling_dict = Scorer( | ||
{ | ||
"wicket": [25, 25, 16], | ||
"4-wicket-haul": [8, 4, 4], | ||
"5-wicket-haul": [16, 8, 8], | ||
"Maiden": [4, 8, 0], | ||
} | ||
) | ||
|
||
wk_dict = Scorer( | ||
{ | ||
"Catch": [8, 8, 8], | ||
"Stump": [12, 12, 12], | ||
} | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
# Define here the models for your scraped items | ||
# | ||
# See documentation in: | ||
# https://docs.scrapy.org/en/latest/topics/items.html | ||
|
||
import scrapy | ||
|
||
|
||
class Baseitem(scrapy.Item): | ||
file = "" | ||
folder = scrapy.Field() | ||
unique = [] | ||
|
||
def get_key(self): | ||
d = dict(self) | ||
key = self.file | ||
key += "".join([d[i] for i in self.unique]) | ||
|
||
|
||
class MatchidItem(Baseitem): | ||
file = "match_ids" | ||
name = scrapy.Field() | ||
matchid = scrapy.Field() | ||
date = scrapy.Field() | ||
unique = ["name", "matchid"] | ||
|
||
|
||
class PlayerItem(Baseitem): | ||
file = "id_names" | ||
longname = scrapy.Field() | ||
name = scrapy.Field() | ||
gametype = scrapy.Field() | ||
retired = scrapy.Field() | ||
unique = ["name", "gametype"] | ||
|
||
|
||
class BowlingItem(Baseitem): | ||
file = "bowling" | ||
matchid = scrapy.Field() | ||
overs = scrapy.Field() | ||
score = scrapy.Field() | ||
economy = scrapy.Field() | ||
name = scrapy.Field() | ||
folder= scrapy.Field() | ||
|
||
|
||
class BattingItem(Baseitem): | ||
file = "batting" | ||
matchid = scrapy.Field() | ||
score = scrapy.Field() | ||
strike_rate = scrapy.Field() | ||
name = scrapy.Field() | ||
folder= scrapy.Field() | ||
|
||
class wicketkeepingItem(Baseitem): | ||
file = "wicketkeeping" | ||
matchid = scrapy.Field() | ||
score = scrapy.Field() | ||
name = scrapy.Field() | ||
folder= scrapy.Field() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
# Define your item pipelines here | ||
# | ||
# Don't forget to add your pipeline to the ITEM_PIPELINES setting | ||
# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html | ||
|
||
|
||
# useful for handling different item types with a single interface | ||
from itemadapter import ItemAdapter | ||
from scrapy.exceptions import DropItem | ||
from scrapy.exporters import CsvItemExporter | ||
|
||
|
||
class CricketcrawlerPipeline: | ||
def open_spider(self, spider): | ||
self.name_to_exporter = {} | ||
self.seen = list() | ||
|
||
def close_spider(self, spider): | ||
for exporter, fp in self.name_to_exporter.values(): | ||
exporter.finish_exporting() | ||
fp.close() | ||
|
||
def process_item(self, item, spider): | ||
exporter= self._exporter_for_item(item) | ||
del item["folder"] | ||
item2 = dict(item) | ||
exporter.export_item(item2) | ||
return item | ||
|
||
def _exporter_for_item(self, item): | ||
name = f'../../data_crawler/{item["folder"]}/{item.file}.csv' | ||
if name not in self.name_to_exporter: | ||
f = open(name, "wb") | ||
exporter = CsvItemExporter(f) | ||
exporter.start_exporting() | ||
self.name_to_exporter[name] = exporter, f | ||
return self.name_to_exporter[name][0] |
Oops, something went wrong.