In [6]:
from elasticsearch import Elasticsearch, helpers
from json_lineage import load
from dotenv import load_dotenv
import os
load_dotenv("elastic-start-local/.env")
ES_LOCAL_API_KEY = os.environ.get("ES_LOCAL_API_KEY")
ES_LOCAL_URL = os.environ.get("ES_LOCAL_URL")

In [7]:
es = Elasticsearch([ES_LOCAL_URL], api_key=ES_LOCAL_API_KEY)

In [5]:
es.indices.create(
    index='posts',
    mappings={
        "settings": {
            "analysis": {
                "analyzer": {
                    "html_analyzer": {
                        "tokenizer": "keyword",
                        "char_filter": [
                            "html_strip"
                        ]
                    }
                }
            }
        }, 
        "properties": {
            "Body": {
                "type": "text",
                "analyzer": "html_analyzer"
            },
            "CommentCount": {
                "type": "integer"
            },
            "CreationDate": {
                "type": "date",
                "format": "yyyy-MM-dd HH:mm:ss.SSS"
            }
            
        }
    }
)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'posts'})

In [15]:
data = load('StackOverflowMini_dbo_Posts.json')
print("data loaded")

actions = [
    {
        "_index": "posts",
        "_source": doc
    }
    for doc in data
]
print("data prepared for bulk insert")

helpers.bulk(es, actions)

(1565425, [])

In [3]:
es.search(index='posts')

ObjectApiResponse({'took': 1, 'timed_out': False, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 10000, 'relation': 'gte'}, 'max_score': 1.0, 'hits': [{'_index': 'posts', '_id': '38KbT5MBd1tDSIJ77JnM', '_score': 1.0, '_source': {'Body': "<p>I want to use a track-bar to change a form's opacity.</p>\n\n<p>This is my code:</p>\n\n<pre><code>decimal trans = trackBar1.Value / 5000;\nthis.Opacity = trans;\n</code></pre>\n\n<p>When I build the application, it gives the following error:</p>\n\n<blockquote>\n  <p>Cannot implicitly convert type <code>'decimal'</code> to <code>'double'</code>.</p>\n</blockquote>\n\n<p>I tried using <code>trans</code> and <code>double</code> but then the control doesn't work. This code worked fine in a past VB.NET project.</p>\n", 'CommentCount': 1, 'CreationDate': '2008-07-31 21:42:52.667'}}, {'_index': 'posts', '_id': '4MKbT5MBd1tDSIJ77JnM', '_score': 1.0, '_source': {'Body': '<p>I have an absolutely positioned <

In [4]:
resp = es.indices.analyze(
    tokenizer="keyword",
    char_filter=["html_strip"],
    text="I&apos;m so happy</b>!</p>"
)
print(resp)

{'tokens': [{'token': "I'm so happy!\n", 'start_offset': 0, 'end_offset': 26, 'type': 'word', 'position': 0}]}


In [11]:

es.indices.create(
    index='html_posts',
    settings={
        "analysis": {
            "analyzer": {
                "html_analyzer": {
                    "tokenizer": "keyword",
                    "char_filter": [
                        "html_strip"
                    ]
                }
            }
        }
    }, 
    mappings={
        "properties": {
            "Body": {
                "type": "text",
                "analyzer": "html_analyzer"
            },
            "CommentCount": {
                "type": "integer"
            },
            "CreationDate": {
                "type": "date",
                "format": "yyyy-MM-dd HH:mm:ss.SSS"
            }
        }
    }
)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'html_posts'})

In [14]:
data_mini = load('mini.json')

actions = [
    {
        "_index": "html_posts",
        "_source": doc
    }
    for doc in data_mini
]
print("data prepared for bulk insert")

helpers.bulk(es, actions)

data prepared for bulk insert


(10, [])

In [19]:
es.indices.analyze(
    index='html_posts',
    analyzer="html_analyzer",
    text="<p>I want to use a track-bar to change a form's opacity.</p>\n\n<p>This is my code:</p>\n\n<pre><code>decimal trans = trackBar1.Value / 5000;\nthis.Opacity = trans;\n</code></pre>\n\n<p>When I build the application, it gives the following error:</p>\n\n<blockquote>\n  <p>Cannot implicitly convert type <code>'decimal'</code> to <code>'double'</code>.</p>\n</blockquote>\n\n<p>I tried using <code>trans</code> and <code>double</code> but then the control doesn't work. This code worked fine in a past VB.NET project.</p>\n"
)

ObjectApiResponse({'tokens': [{'token': "\nI want to use a track-bar to change a form's opacity.\n\n\n\nThis is my code:\n\n\n\ndecimal trans = trackBar1.Value / 5000;\nthis.Opacity = trans;\n\n\n\n\nWhen I build the application, it gives the following error:\n\n\n\n\n  \nCannot implicitly convert type 'decimal' to 'double'.\n\n\n\n\n\nI tried using trans and double but then the control doesn't work. This code worked fine in a past VB.NET project.\n\n", 'start_offset': 0, 'end_offset': 507, 'type': 'word', 'position': 0}]})

In [22]:
es.search(index='html_posts')

ObjectApiResponse({'took': 0, 'timed_out': False, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 10, 'relation': 'eq'}, 'max_score': 1.0, 'hits': [{'_index': 'html_posts', '_id': '0NroT5MBd1tDSIJ7Hnwo', '_score': 1.0, '_source': {'Body': "<p>I want to use a track-bar to change a form's opacity.</p>\n\n<p>This is my code:</p>\n\n<pre><code>decimal trans = trackBar1.Value / 5000;\nthis.Opacity = trans;\n</code></pre>\n\n<p>When I build the application, it gives the following error:</p>\n\n<blockquote>\n  <p>Cannot implicitly convert type <code>'decimal'</code> to <code>'double'</code>.</p>\n</blockquote>\n\n<p>I tried using <code>trans</code> and <code>double</code> but then the control doesn't work. This code worked fine in a past VB.NET project.</p>\n", 'CommentCount': 1, 'CreationDate': '2008-07-31 21:42:52.667'}}, {'_index': 'html_posts', '_id': '0droT5MBd1tDSIJ7Hnwo', '_score': 1.0, '_source': {'Body': '<p>I have an absolutely positi