In [1]:
import scrapy
from scrapy.crawler import CrawlerProcess

from researchgate.researchgate.spiders.paper import PaperSpider

In [2]:
process = CrawlerProcess(
    settings={
        "FEEDS": {"items.json": {"format": "json"}},
        "LOG_ENABLED": False
        #     "LOG_LEVEL": 'INFO',
    }
)
process.crawl(PaperSpider)
process.start()

In [48]:
import json

with open("items.json") as f:
    items = json.load(f)

In [49]:
items[0]

{'id': '323694313',
 'title': 'The Lottery Ticket Hypothesis: Training Pruned Neural Networks',
 'abstract': 'Recent work on neural network pruning indicates that, at training time, neural networks need to be significantly larger in size than is necessary to represent the eventual functions that they learn. This paper articulates a new hypothesis to explain this phenomenon. This conjecture, which we term the "lottery ticket hypothesis," proposes that successful training depends on lucky random initialization of a smaller subcomponent of the network. Larger networks have more of these "lottery tickets," meaning they are more likely to luck out with a subcomponent initialized in a configuration amenable to successful optimization. This paper conducts a series of experiments with XOR and MNIST that support the lottery ticket hypothesis. In particular, we identify these fortuitously-initialized subcomponents by pruning low-magnitude weights from trained networks. We then demonstrate that t

In [115]:
from elasticsearch import Elasticsearch

In [116]:
es = Elasticsearch(hosts=[{"host": "localhost", "port": 9200}])

In [117]:
es.indices.delete("paper-index", ignore=404)

{'acknowledged': True}

In [118]:
es.indices.create(index="paper-index", ignore=400)

{'acknowledged': True, 'shards_acknowledged': True, 'index': 'paper-index'}

In [14]:
es.index(index="paper-index", id=items[0]["id"], body=items[0])

{'_index': 'paper-index',
 '_type': '_doc',
 '_id': '323694313',
 '_version': 5,
 'result': 'updated',
 '_shards': {'total': 2, 'successful': 1, 'failed': 0},
 '_seq_no': 4,
 '_primary_term': 2}

In [119]:
from elasticsearch_dsl import (
    Document,
    Date,
    Nested,
    Boolean,
    analyzer,
    InnerDoc,
    Completion,
    Keyword,
    Text,
)

In [120]:
class Paper(Document):
    title = Text(fields={"raw": Keyword()})
    date = Text()
    abstract = Text()
    authors = Text()
    references = Text()

    class Index:
        name = "paper-index"

In [121]:
# create the mappings in Elasticsearch
Paper.init(using=es)

In [122]:
for item in items:
    paper = Paper(meta={"id": item["id"]}, **item)
    paper.save(using=es)

In [127]:
from elasticsearch_dsl import Search

In [128]:
s = Search(using=es, index="paper-index").query("match", title="the lottery")

In [132]:
s = Search(using=es, index="paper-index").query("match", id="323694313")

In [133]:
response = s.execute()

In [134]:
for hit in response:
    print(hit.meta.score, hit.title)

2.7725885 The Lottery Ticket Hypothesis: Training Pruned Neural Networks


In [135]:
# by calling .search we get back a standard Search object
s = Paper.search(using=es)
# the search is already limited to the index and doc_type of our document
s = s.query("match", title="the lottery")

In [137]:
response = s.execute()

In [138]:
for hit in response:
    print(hit.meta.score, hit.title)

5.3779216 The Lottery Ticket Hypothesis: Training Pruned Neural Networks


In [139]:
res = es.get(index="paper-index", id="323694313")

In [141]:
res

{'_index': 'paper-index',
 '_type': '_doc',
 '_id': '323694313',
 '_version': 1,
 '_seq_no': 0,
 '_primary_term': 1,
 'found': True,
 '_source': {'id': '323694313',
  'title': 'The Lottery Ticket Hypothesis: Training Pruned Neural Networks',
  'abstract': 'Recent work on neural network pruning indicates that, at training time, neural networks need to be significantly larger in size than is necessary to represent the eventual functions that they learn. This paper articulates a new hypothesis to explain this phenomenon. This conjecture, which we term the "lottery ticket hypothesis," proposes that successful training depends on lucky random initialization of a smaller subcomponent of the network. Larger networks have more of these "lottery tickets," meaning they are more likely to luck out with a subcomponent initialized in a configuration amenable to successful optimization. This paper conducts a series of experiments with XOR and MNIST that support the lottery ticket hypothesis. In part