Create the index

In [2]:
# Elasticsearch server details

import requests
from my_credentials import *
base_url = "http://localhost:9200"
index_name = "software_jobs"

In [3]:


# Index settings and mapping
index_settings = {
    "settings": {
        "number_of_shards": 1,
        "number_of_replicas": 0
    },
    "mappings": {
        "properties": {
            'job_id': {"type": "keyword"},
            'title': {"type": "keyword"},
            'company_name': {"type": "keyword"},
            'location': {"type": "keyword"},
            'via': {"type": "keyword"},
            'description': {"type": "keyword"},
        }
    }
}

# Create the index with authentication
response = requests.put(
    f"{base_url}/{index_name}",
    json=index_settings,
    auth=(username, password)  # Include authentication parameters
)

# Check the response
if response.status_code == 200:
    print(f"Index '{index_name}' created successfully.")
else:
    print(f"Error creating index: {response.content}")


Index 'software_jobs' created successfully.


Post the job offers to the index

In [4]:
import json
elasticsearch_url = "http://localhost:9200"
with open('jobs_jumble.json') as f:
    cleaned = json.load(f)
for c in cleaned:
        response = requests.post(
            f"{elasticsearch_url}/software_jobs/_doc",
            json=c,
            auth=(username, password)  # Include authentication parameters

        )

        if response.status_code == 201:
            print(f"Job uploaded successfully: {response.json()}")
        else:
            print(f"Error uploading job: {response.json()}")

Job uploaded successfully: {'_index': 'software_jobs', '_id': 'z2-egI0BYlVpQ6ja1zhs', '_version': 1, 'result': 'created', '_shards': {'total': 1, 'successful': 1, 'failed': 0}, '_seq_no': 0, '_primary_term': 1}
Job uploaded successfully: {'_index': 'software_jobs', '_id': '0G-egI0BYlVpQ6ja1zil', '_version': 1, 'result': 'created', '_shards': {'total': 1, 'successful': 1, 'failed': 0}, '_seq_no': 1, '_primary_term': 1}
Job uploaded successfully: {'_index': 'software_jobs', '_id': '0W-egI0BYlVpQ6ja1zit', '_version': 1, 'result': 'created', '_shards': {'total': 1, 'successful': 1, 'failed': 0}, '_seq_no': 2, '_primary_term': 1}
Job uploaded successfully: {'_index': 'software_jobs', '_id': '0m-egI0BYlVpQ6ja1zi2', '_version': 1, 'result': 'created', '_shards': {'total': 1, 'successful': 1, 'failed': 0}, '_seq_no': 3, '_primary_term': 1}
Job uploaded successfully: {'_index': 'software_jobs', '_id': '02-egI0BYlVpQ6ja1zjT', '_version': 1, 'result': 'created', '_shards': {'total': 1, 'successfu

Retrieve all data from the index

In [5]:
elasticsearch_url = "http://localhost:9200"
index_name = "software_jobs"
# Define the search query to match all documents
query = {
    "query": {
        "match_all": {}
    }
}

# Send the search request
response = requests.get(
    f"{elasticsearch_url}/{index_name}/_search",
    json=query,
    auth=(username, password)  # Include authentication parameters
)

# Check the response
if response.status_code == 200:
    data = response.json()
    hits = data.get("hits", {}).get("hits", [])

    if hits:
        for hit in hits:
            print(f"Document ID: {hit['_id']}")
            print(f"Document Content: {hit['_source']}")
    else:
        print("No documents found in the index.")
else:
    print(f"Error fetching data: {response.content}")


Document ID: z2-egI0BYlVpQ6ja1zhs
Document Content: {'job_id': 'eyJqb2JfdGl0bGUiOiJSZW1vdGUgU2VydmljZU5vdyBBZG1pbmlzdHJhdGlvbiB3aXRoIFB5dGhvbiwgUiwgQWR2YW5jZWQgU1FMLCBkYXRhYmFzZSwgZGF0YSBtYW5hZ2VtZW50LUFXIiwiaHRpZG9jaWQiOiJtbGp0amdnLWxFQUFOeTlMQUFBQUFBPT0iLCJobCI6ImZyIiwiZmMiOiJFdUlCQ3FJQlFVeFBibkJaVlZkM2RrbERhblJWU1dZMFNEUmxhMnR2WjFScmVuZE5PVmR1ZUVOc04zSTNSRU5HWldkVVJWZDRiRzlwWm1wSmNXeHpPRTAzWVhWdFRWZFVkelJZWnpadVEwMUtUV1UwT0hCalVXNVJka1JLY1RWamFIWnhkV1Z3VjFCeFdWTkRVMDFSVXpKcFN6ZGtabWRpZEd0d1FsQktSMDlOYTFsaWVVVnlNbEpPWW5JeWNtcGxkV1V4Wms5b2JGOXVWR05YU2xodmVXaG5Obk42YlZkM0VoZE1TRzk1V21WWVdVNWZhbkJ3ZEZGUU9UaGxZWE5CYnhvaVFVcDNheTA0Wkc4dFpITkdZbGh0TUc1ak1IbDVUa2d6VDJ0UmNtWm1WRmhrVVEiLCJmY3YiOiIzIiwiZmNfaWQiOiJmY18xIiwiYXBwbHlfbGluayI6eyJ0aXRsZSI6Ii5uRmcyZWJ7Zm9udC13ZWlnaHQ6NTAwfS5CaTZEZGN7Zm9udC13ZWlnaHQ6NTAwfVBvc3R1bGVyIHN1ciBTYWx1dGVNeUpvYiIsImxpbmsiOiJodHRwczovL3NhbHV0ZW15am9iLmNvbS9qb2JzL3JlbW90ZS1zZXJ2aWNlbm93LWFkbWluaXN0cmF0aW9uLXdpdGgtcHl0aG9uLXItYWR2YW5jZWQtc3FsLWRhdGFiYXNlLWRhdGEt

Eventually delete the index

In [2]:
response = requests.delete("http://localhost:9200/software_jobs", auth = (username, password))
response.text

'{"acknowledged":true}'