# Elastic Search Playground
First, load the credentials to connect the elastic search client and the posgres database. 

In [1]:
import os

# workaround: change the working directory to the root of the project
os.chdir("../")
print(os.getcwd())

/home/jsonpy/Projects/Practical/twitter-query-expansion


In [2]:
import psycopg2
import sys
import json

from src.utils import get_project_root, es_connect
from elasticsearch import Elasticsearch
import configparser

config = configparser.ConfigParser()
config.read('auth/es-credentials.ini')

['auth/es-credentials.ini']

Check if the Elastic Search instance is running by using the elastic search python library

In [3]:
# connect to elastic instance
es_client = es_connect(credentials=config['ELASTIC'])
es_client.info()

Connecting to Elastic Search...
Successfully connected to https://localhost:9200


ObjectApiResponse({'name': 'f6240d32ea65', 'cluster_name': 'docker-cluster', 'cluster_uuid': 'YIiFu2p-QOWJhSPb-Zcavw', 'version': {'number': '8.5.2', 'build_flavor': 'default', 'build_type': 'docker', 'build_hash': 'a846182fa16b4ebfcc89aa3c11a11fd5adf3de04', 'build_date': '2022-11-17T18:56:17.538630285Z', 'build_snapshot': False, 'lucene_version': '9.4.1', 'minimum_wire_compatibility_version': '7.17.0', 'minimum_index_compatibility_version': '7.0.0'}, 'tagline': 'You Know, for Search'})

---
## Create Index

In [6]:
es_client.indices.create(index="test")

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'test'})

## Delete Index

In [7]:
es_client.indices.delete(index="test")

ObjectApiResponse({'acknowledged': True})

---
## Using Analyzer and Settings

In [4]:
es_config = json.load(open('config/es-config.conf'))

In [None]:
es_client.indices.create(index="test", settings=es_config["settings"], mappings=es_config["mappings"])

In [5]:
txt = " RT @PeterPan 500 Millionen und 17,30€ 'bin' <b>Merkel's</b> als #Leben#SPD_VM #liebte#um 19:30 Uhr Millionen  möchte liebten liebte lieb"

In [6]:
res = es_client.indices.analyze(index="tweets_35", analyzer="tweet_analyzer", text=txt)

for i, doc in enumerate(res["tokens"]):
    print(f"{doc['token']}")

_retweet_
_user_peterpan
500
million
17
30€
merkel
_hashtag_leben
_hashtag_spd_vm
_hashtag_liebte
_hashtag_um
19:30
uhr
mocht
liebt
lieb


Now we want to feed data from the Twitter PostgreSQL database into Elastic Search. Therefore use the script provided within the `src` folder.

---
## Search data

In [58]:
es_query = json.load(open('config/es-query.conf'))
es_query

{'query': {'bool': {'should': {'query_string': {'query': 'philipp CDU',
     'fields': ['txt', 'hashtags^5']}},
   'must': [{'term': {'hashtags': 'amthor'}}],
   'must_not': {'term': {'txt': '_retweet_'}},
   'filter': [{'range': {'created_at': {'lte': '2023-01-01'}}},
    {'range': {'created_at': {'gte': '2021-01-01'}}}]}},
 'collapse': {},
 'sort': {}}

In [61]:
res = es_client.search(index="tweets_35", size=5, query=es_query["query"])

print("Total Hits:", res["hits"]["total"]["value"])
res["hits"]["hits"]

Total Hits: 25


[{'_index': 'tweets_35',
  '_id': '1433695794431938589',
  '_score': 19.724907,
  '_source': {'retweet_count': 15,
   'reply_count': 12,
   'like_count': 142,
   'created_at': '2021-09-03T09:38:08+02:00',
   'txt': '"Philipp #Amthor,  droht gar aus dem Parlament zu fliegen. Das wäre durchaus denkbar, wenn SPD-Kandidat Erik von Malottki oder AfD Mann Enrico Kommning Amthor den Wahlkreis 16 in Vorpommern abnehmen, andere CDU-Leute ihren Wahlkreis aber direkt gewinnen." https://t.co/7v2qw6avRB',
   'hashtags': ['amthor'],
   'word_count': 39}},
 {'_index': 'tweets_35',
  '_id': '1430140941621514252',
  '_score': 19.271313,
  '_source': {'retweet_count': 14,
   'reply_count': 0,
   'like_count': 31,
   'created_at': '2021-08-24T14:12:25+02:00',
   'txt': 'Die Namensliste der Union liest sich noch prominenter. Im Kern gilt hier der Grundsatz, wer seinen \nWahlkreis in einem Flächenland nicht gewinnt, kommt nicht in den Bundestag: Philipp #Amthor (CDU), Tillman Kuban [...] ziehen \nnicht in 