In [1]:
# Setup Notebook to load Django code
# From project root, run: jupyter-lab

import os
import sys
from pathlib import Path

django_project_dir = Path('../')
sys.path.insert(0, str(django_project_dir))
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "ratom_api.settings.local")

import django
django.setup()

# Start elasticsearch

In [3]:
! docker-compose up -d elasticsearch kibana

Starting kibana ... 
Starting elasticsearch ... 
[2Bting kibana        ... [32mdone[0m[2A[2K

In [4]:
! docker-compose ps

    Name                  Command             State             Ports           
--------------------------------------------------------------------------------
elasticsearch   /usr/local/bin/docker-entr    Up      0.0.0.0:9200->9200/tcp,   
                ...                                   0.0.0.0:9300->9300/tcp    
kibana          /usr/local/bin/dumb-init -    Up      0.0.0.0:5601->5601/tcp    
                ...                                                             


elasticsearch should be running at: http://localhost:9200/

In [None]:
# If it doesn't start.. look at logs:

! docker-compose logs elasticsearch

## Re-build index

https://django-elasticsearch-dsl.readthedocs.io/en/latest/quickstart.html#populate

In [140]:
! time python ../manage.py search_index -f --rebuild --parallel

Traceback (most recent call last):
  File "/Users/copelco/.pyenv/versions/ratom-api/lib/python3.7/site-packages/django_elasticsearch_dsl/documents.py", line 139, in to_field
    model_field.__class__](attr=field_name)
KeyError: <class 'django.contrib.postgres.fields.jsonb.JSONField'>

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "../manage.py", line 18, in <module>
    execute_from_command_line(sys.argv)
  File "/Users/copelco/.pyenv/versions/ratom-api/lib/python3.7/site-packages/django/core/management/__init__.py", line 381, in execute_from_command_line
    utility.execute()
  File "/Users/copelco/.pyenv/versions/ratom-api/lib/python3.7/site-packages/django/core/management/__init__.py", line 357, in execute
    django.setup()
  File "/Users/copelco/.pyenv/versions/ratom-api/lib/python3.7/site-packages/django/__init__.py", line 24, in setup
    apps.populate(settings.INSTALLED_APPS)
  File "/Users/copelco/.pyenv/versions

# Match

In [108]:
import pandas as pd

from ratom.documents import MessageDocument

In [121]:
# https://django-elasticsearch-dsl.readthedocs.io/en/latest/quickstart.html#search

search = MessageDocument.search().filter("match", msg_subject="help")

## Count

In [122]:
search.count()

2019-11-28 10:32:53,412 elasticsearch        INFO     GET http://localhost:9200/message/_count [status:200 request:0.005s]


91

In [111]:
response = search.execute()

2019-11-28 10:26:46,848 elasticsearch        INFO     GET http://localhost:9200/message/_search [status:200 request:0.013s]


In [112]:
# how is this different from search.count()?
response.hits.total

{'value': 91, 'relation': 'eq'}

In [113]:
response.took

9

## Response

In [114]:
# simple using response
for msg in response[:10]:
    print(msg.msg_subject)

Help with statistical analysis
Thanks for your help!!
FW: HELP!!! I'VE FAINTED AND I CAN'T COME TO!!!!!
Help with statistical analysis
Re: Info help.
Re: Info help.
Re: Info help.
Thanks for your help!!
Re: Help - Missing Profile Books!
Help on cluster analysis


In [115]:
# using pandas
results_df = pd.DataFrame((d.to_dict() for d in search[:10]))
results_df

2019-11-28 10:26:52,697 elasticsearch        INFO     GET http://localhost:9200/message/_search [status:200 request:0.005s]


Unnamed: 0,collection,msg_from,msg_subject,msg_body,directory
0,"{'title': 'vkaminski', 'accession_date': '2019...","""Vince J Kaminski""",Help with statistical analysis,"Date: Thu, 20 Apr 2000 08:05:00 -0700 (PDT),Th...",/Top of Personal Folders/test/Vincent_Kaminski...
1,"{'title': 'vkaminski', 'accession_date': '2019...","""Bridget D'Silva""",Thanks for your help!!,"Date: Fri, 11 Feb 2000 15:05:00 -0800 (PST),Fr...",/Top of Personal Folders/test/Vincent_Kaminski...
2,"{'title': 'dana_davis', 'accession_date': '201...",Davis,FW: HELP!!! I'VE FAINTED AND I CAN'T COME TO!!!!!,"Date: Mon, 8 Oct 2001 21:57:25 -0700 (PDT),Mon...",/Top of Personal Folders/davis-d/DDAVIS (Non-P...
3,"{'title': 'vkaminski', 'accession_date': '2019...","""Vince J Kaminski""",Help with statistical analysis,"date: Thu, 20 Apr 2000 08:05:00 -0700 (PDT) Th...",/Top of Personal Folders/test/Vincent_Kaminski...
4,"{'title': 'vkaminski', 'accession_date': '2019...","""Michael SCHILMOELLER"" <Michael_Schilmoeller@p...",Re: Info help.,"date: Tue, 15 Aug 2000 09:08:00 -0700 (PDT) Tu...",/Top of Personal Folders/test/Vincent_Kaminski...
5,"{'title': 'vkaminski', 'accession_date': '2019...","""Vince J Kaminski""",Re: Info help.,"date: Tue, 15 Aug 2000 17:51:00 -0700 (PDT) Tu...",/Top of Personal Folders/test/Vincent_Kaminski...
6,"{'title': 'vkaminski', 'accession_date': '2019...","""Vince J Kaminski""",Re: Info help.,"date: Tue, 15 Aug 2000 17:51:00 -0700 (PDT) Tu...",/Top of Personal Folders/test/Vincent_Kaminski...
7,"{'title': 'vkaminski', 'accession_date': '2019...","""Bridget D'Silva""",Thanks for your help!!,"date: Fri, 11 Feb 2000 15:05:00 -0800 (PST) Fr...",/Top of Personal Folders/test/Vincent_Kaminski...
8,"{'title': 'sally_beck', 'accession_date': '201...","""Sally Beck""",Re: Help - Missing Profile Books!,Body-Type: plain-text\r\n\r\nI did walk out of...,/Top of Personal Folders/beck-s/Sally_Beck_Jun...
9,"{'title': 'vkaminski', 'accession_date': '2019...","""Lance Cunningham""",Help on cluster analysis,"date: Thu, 22 Mar 2001 14:45:00 -0800 (PST) Th...",/Top of Personal Folders/kaminski-v/Vincent_Ka...


## Multi-match

https://elasticsearch-dsl.readthedocs.io/en/latest/search_dsl.html#queries

In [128]:
from elasticsearch_dsl import Q

In [131]:
q = Q("match", msg_subject='help') & Q("match", collection__title="kate_symes")
q

Bool(must=[Match(msg_subject='help'), Match(collection__title='kate_symes')])

In [138]:
search = MessageDocument.search().query(q)
response = search.execute()
search.count()

2019-11-28 10:42:21,354 elasticsearch        INFO     GET http://localhost:9200/message/_search [status:200 request:0.020s]


9

In [139]:
results_df = pd.DataFrame((d.to_dict() for d in search[:10]))
results_df

2019-11-28 10:42:31,723 elasticsearch        INFO     GET http://localhost:9200/message/_search [status:200 request:0.007s]


Unnamed: 0,collection,msg_from,msg_subject,msg_body,directory
0,"{'title': 'kate_symes', 'accession_date': '201...","""Kate Symes""",Re: Help!,Body-Type: plain-text\r\n\r\nWhatever she's ma...,/Top of Personal Folders/symes-k/kate symes 6-...
1,"{'title': 'kate_symes', 'accession_date': '201...","""Kate Symes""",Re: Help!,Body-Type: plain-text\r\n\r\nWhatever she's ma...,/Top of Personal Folders/symes-k/kate symes 6-...
2,"{'title': 'kate_symes', 'accession_date': '201...","""Kate Symes""",Re: Help!,Body-Type: plain-text\r\n\r\nWhatever she's ma...,/Top of Personal Folders/symes-k/kate symes 6-...
3,"{'title': 'kate_symes', 'accession_date': '201...","""Mark Confer""",Can You Help,"Body-Type: plain-text\r\n\r\nKate, could you h...",/Top of Personal Folders/symes-k/kate symes 6-...
4,"{'title': 'kate_symes', 'accession_date': '201...","""Mark Confer""",Can You Help,"Body-Type: plain-text\r\n\r\nKate, could you h...",/Top of Personal Folders/symes-k/kate symes 6-...
5,"{'title': 'kate_symes', 'accession_date': '201...","""Mark Confer""",Can You Help,"Body-Type: plain-text\r\n\r\nKate, could you h...",/Top of Personal Folders/symes-k/kate symes 6-...
6,"{'title': 'kate_symes', 'accession_date': '201...","""Mary Jane Symes"" <marys@tvapdx.com>",Help Save the Arctic Refuge,Body-Type: plain-text\r\n\r\nI just signed thi...,/Top of Personal Folders/symes-k/kate symes 6-...
7,"{'title': 'kate_symes', 'accession_date': '201...","""Mary Jane Symes"" <marys@tvapdx.com>",Help Save the Arctic Refuge,Body-Type: plain-text\r\n\r\nI just signed thi...,/Top of Personal Folders/symes-k/kate symes 6-...
8,"{'title': 'kate_symes', 'accession_date': '201...","""Mary Jane Symes"" <marys@tvapdx.com>",Help Save the Arctic Refuge,Body-Type: plain-text\r\n\r\nI just signed thi...,/Top of Personal Folders/symes-k/kate symes 6-...
