In [30]:
import requests


def query_es(body, index_name):
    """
       Sends a POST request to a specified endpoint to query an Elasticsearch index.
   
       This function constructs a JSON payload using the provided 'body' and 'index_name',
       which represent the query parameters and the name of the Elasticsearch index respectively.
       It then sends a POST request to a Flask application handling the '/data-extract' endpoint.
       The Flask application is expected to forward this query to an Elasticsearch server.
   
       Parameters:
       - body (dict): The Elasticsearch query in the form of a dictionary.
       - index_name (str): The name of the Elasticsearch index to be queried.
   
       Returns:
       - dict: If the request is successful (HTTP 200), returns the JSON response containing the query results.
       - str: If the request fails, returns a string message indicating the failure with the HTTP status code.
   
       Example of 'body':
       {
           "query": {
               "match": {
                   "text": "search term"
               }
           }
       }
       """
    # route url: /data-extract --method POST
    url = f"http://127.0.0.1:9090/data-extract"
    response = requests.post(url, json={"body": body, "index": index_name})
    if response.status_code == 200:
        return response.json()  # Parse the response as JSON
    else:
        return f"Failed to fetch data: Status code {response.status_code}"

In [31]:
# query twitter data
twitter_query = {
    "query": {
        "range": {
            "created_at": {
                #mofides here if you want different dates
                "gte": "2022-05-11T00:00:00Z",
                "lte": "2022-07-31T23:59:59Z",
                "format": "strict_date_optional_time"
            }
        }
    },
    #modifes if you want different size 
    "size": 1000
}

index_name = 'twitter'
twitter_data = query_es(body=twitter_query, index_name=index_name)
print(twitter_data)

[{'_id': '1524258746012106753', '_index': 'twitter', '_score': 1.0, '_source': {'created_at': '2022-05-11T05:22:59Z', 'geo': {'latitude': -34.207856922252674, 'longitude': 146.95075598694146}, 'id': '1524258746012106753', 'sentiment': -0.08108108108108109}}, {'_id': '1524328442337976320', '_index': 'twitter', '_score': 1.0, '_source': {'created_at': '2022-05-11T09:59:55Z', 'geo': {'latitude': -34.45227933748771, 'longitude': 142.2903605244629}, 'id': '1524328442337976320', 'sentiment': 0.05}}, {'_id': '1524337742389256192', '_index': 'twitter', '_score': 1.0, '_source': {'created_at': '2022-05-11T10:36:53Z', 'geo': {'latitude': -36.0704108003365, 'longitude': 144.5966307141835}, 'id': '1524337742389256192', 'sentiment': -0.05714285714285714}}, {'_id': '1524258523672354816', '_index': 'twitter', '_score': 1.0, '_source': {'created_at': '2022-05-11T05:22:06Z', 'geo': {'latitude': -37.727357175507144, 'longitude': 145.50805348639156}, 'id': '1524258523672354816', 'sentiment': 0.4285714285

In [38]:
# query epa data:
epa_query = {
    "query": {
        "match_all": {}
    },
    "size": 100
}
index_name = 'epa-000001'

epa_data = query_es(body=epa_query, index_name=index_name)
print(epa_data)

[{'_id': 'hb08a48BAp0N_qlVRIya', '_index': 'epa-000001', '_score': 1.0, '_source': {'latitude': 145.1324, 'longitude': -37.8287277, 'parameters': [{'averageValue': 4.39, 'name': 'PM2.5', 'startDateTime': '2024-05-12T04:00:00Z', 'timeSeriesName': '1HR_AV', 'totalSample': 13, 'unit': '&micro;g/m&sup3;', 'untilDateTime': '2024-05-12T05:00:00Z'}, {'averageValue': 6.88, 'name': 'PM2.5', 'startDateTime': '2024-05-11T05:00:00Z', 'timeSeriesName': '24HR_AV', 'totalSample': 312, 'unit': '&micro;g/m&sup3;', 'untilDateTime': '2024-05-12T05:00:00Z'}], 'siteID': '77062cb7-3e3b-4984-b6d0-03dda76177f2', 'siteName': 'Box Hill'}}, {'_id': 'iL08a48BAp0N_qlVV4xG', '_index': 'epa-000001', '_score': 1.0, '_source': {'latitude': 146.5392, 'longitude': -38.29585, 'parameters': [{'averageValue': 4.65, 'name': 'Particles', 'startDateTime': '2024-05-12T04:00:00Z', 'timeSeriesName': '1HR_AV', 'totalSample': 12, 'unit': '&micro;g/m&sup3;', 'untilDateTime': '2024-05-12T05:00:00Z'}, {'averageValue': 5.33, 'name': '