# Elastic Search Playground
First, load the credentials to connect the elastic search client and the posgres database. 

In [1]:
import os

# workaround: change the working directory to the root of the project
os.chdir("../")
print(os.getcwd())

/home/jsonpy/Projects/Practical/twitter-query-expansion


In [10]:
import psycopg2
import sys
import json

from src.utils import get_project_root, es_connect
from elasticsearch import Elasticsearch
import configparser

config = configparser.ConfigParser()
config.read('auth/es-credentials.ini')

['auth/es-credentials.ini']

Check if the Elastic Search instance is running by using the elastic search python library

In [11]:
# connect to elastic instance
es_client = es_connect(credentials=config['ELASTIC'])
es_client.info()

Connecting to Elastic Search...
Successfully connected to https://localhost:9200


ObjectApiResponse({'name': 'f6240d32ea65', 'cluster_name': 'docker-cluster', 'cluster_uuid': 'YIiFu2p-QOWJhSPb-Zcavw', 'version': {'number': '8.5.2', 'build_flavor': 'default', 'build_type': 'docker', 'build_hash': 'a846182fa16b4ebfcc89aa3c11a11fd5adf3de04', 'build_date': '2022-11-17T18:56:17.538630285Z', 'build_snapshot': False, 'lucene_version': '9.4.1', 'minimum_wire_compatibility_version': '7.17.0', 'minimum_index_compatibility_version': '7.0.0'}, 'tagline': 'You Know, for Search'})

---
## Create Index

In [6]:
es_client.indices.create(index="test")

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'test'})

## Delete Index

In [7]:
es_client.indices.delete(index="test")

ObjectApiResponse({'acknowledged': True})

---
## Using Analyzer and Settings

In [5]:
es_config = json.load(open('config/es-config.conf'))

In [None]:
es_client.indices.create(index="test", settings=es_config["settings"], mappings=es_config["mappings"])

In [32]:
txt = " RT @PeterPan 500 Millionen und 17,30€ 'bin' <b>Merkel's</b> als #Leben#SPD_VM #liebte#um 19:30 Uhr Millionen  möchte liebten liebte lieb"

In [33]:
res = es_client.indices.analyze(index="tweets_30", analyzer="tweet_analyzer", text=txt)

for i, doc in enumerate(res["tokens"]):
    print(f"{doc['token']}")

_retweet_
_user_peterpan
500
million
17
30€
merkel
_hashtag_leben
_hashtag_spd_vm
_hashtag_liebte
_hashtag_um
19:30
uhr
mocht
liebt
lieb


Now we want to feed data from the Twitter PostgreSQL database into Elastic Search. Therefore use the script provided within the `src` folder.

---
## Search data

In [54]:
es_query = {
      "match": {
        "txt": "Maß Bier"
    }
}


In [55]:
res = es_client.search(index="tweets_with_first_analyzer", size=2, query=es_query)
res["hits"]["hits"]

[{'_index': 'tweets_with_first_analyzer',
  '_id': 'wghVAoUBeMc1F03S-U3A',
  '_score': 17.26957,
  '_source': {'id': 1443707088392409095,
   'conversation_id': 1443707088392409095,
   'author_id': 235703405,
   'retweet_count': 6,
   'reply_count': 5,
   'like_count': 82,
   'created_at': '2021-10-01T00:39:27+02:00',
   'txt': 'Ich finde, CSU’ler wie #Ramsauer wären auf dem Oktoberfest bei einem Schweinshaxen und einer Maß Bier besser aufgehoben als bei einer Talkshow von Markus #Lanz!'}},
 {'_index': 'tweets_with_first_analyzer',
  '_id': 'DQpXAoUBeMc1F03SA2es',
  '_score': 9.787772,
  '_source': {'id': 1423614844863979527,
   'conversation_id': 1423614844863979527,
   'author_id': 940691491835564033,
   'retweet_count': 1,
   'reply_count': 0,
   'like_count': 9,
   'created_at': '2021-08-06T14:00:03+02:00',
   'txt': 'Heute ist der Internationale Tag des #Bieres, d.h.:\n1️⃣ #Freunde treffen, um gemeinsam Bier zu genießen.\n2️⃣Die Männer und Frauen zu ehren, welche das Bier brauen un