In [5]:
from elasticsearch import Elasticsearch
import pandas as pd

es = Elasticsearch(["http://localhost:9200"])

In [6]:
def create_collection(collection_name):
    es.indices.create(index=collection_name, ignore=400)


In [7]:
def index_data(collection_name, exclude_column):
    df = pd.read_csv('C:/Users/krish/OneDrive/Desktop/Hash Agile Task/Employee Sample Data 1.csv', encoding='ISO-8859-1')
    
    df = df.drop(columns=[exclude_column], errors='ignore') 

    
    df = df.dropna()  

    for _, row in df.iterrows():
        es.index(index=collection_name, document=row.to_dict())

In [8]:
def search_by_column(collection_name, column_name, column_value):
    query = {
        "query": {
            "match": {
                column_name: column_value
            }
        }
    }
    return es.search(index=collection_name, body=query)

In [9]:
def get_emp_count(collection_name):
    return es.count(index=collection_name)['count']

In [11]:
def del_emp_by_id(collection_name, employee_id):
    es.delete(index=collection_name, id=employee_id)

# 6. Get department facets
def get_dep_facet(collection_name):
    query = {
        "size": 0,
        "aggs": {
            "departments": {
                "terms": {
                    "field": "Department.keyword" 
                }
            }
        }
    }
    return es.search(index=collection_name, body=query)

In [24]:
def execute_tasks():
    v_nameCollection = 'hash_suhitha' 
    v_phoneCollection = '7506'  

    print("Creating Collections...")
    create_collection(v_nameCollection)
    create_collection(v_phoneCollection)

    print("Getting Employee Count...")
    emp_count = get_emp_count(v_nameCollection)
    print(f"Employee Count in {v_nameCollection}: {emp_count}")

    print("Indexing Data...")
    index_data(v_nameCollection, 'Department')
    index_data(v_phoneCollection, 'Gender')


    print("Getting Updated Employee Count...")
    updated_emp_count = get_emp_count(v_nameCollection)
    print(f"Updated Employee Count in {v_nameCollection}: {updated_emp_count}")

    print("Searching by Department 'IT'...")
    search_dept_it = search_by_column(v_nameCollection, 'Department', 'IT')
    print("Search Results for Department 'IT':")
    print(search_dept_it)

    print("Searching by Gender 'Male'...")
    search_gender_male = search_by_column(v_nameCollection, 'Gender', 'Male')
    print("Search Results for Gender 'Male':")
    print(search_gender_male)

    print("Searching Phone Collection by Department 'IT'...")
    search_dept_it_phone = search_by_column(v_phoneCollection, 'Department', 'IT')
    print("Search Results in Phone Collection for Department 'IT':")
    print(search_dept_it_phone)

    print("Getting Department Facets for Name Collection...")
    dep_facet_name = get_dep_facet(v_nameCollection)
    print("Department Facets for Name Collection:")
    print(dep_facet_name)

    print("Getting Department Facets for Phone Collection...")
    dep_facet_phone = get_dep_facet(v_phoneCollection)
    print("Department Facets for Phone Collection:")
    print(dep_facet_phone)

In [25]:
execute_tasks()

Creating Collections...
Getting Employee Count...
Employee Count in hash_yourname: 1101
Indexing Data...


  es.indices.create(index=collection_name, ignore=400)


Getting Updated Employee Count...
Updated Employee Count in hash_yourname: 1201
Searching by Department 'IT'...
Search Results for Department 'IT':
{'took': 5, 'timed_out': False, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 0, 'relation': 'eq'}, 'max_score': None, 'hits': []}}
Searching by Gender 'Male'...
Search Results for Gender 'Male':
{'took': 6, 'timed_out': False, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 603, 'relation': 'eq'}, 'max_score': 0.68649703, 'hits': [{'_index': 'hash_yourname', '_id': 'sNJprJIBJahXiCRmpuT0', '_score': 0.68649703, '_source': {'Employee ID': 'E02002', 'Full Name': 'Kai Le', 'Job Title': 'Controls Engineer', 'Business Unit': 'Manufacturing', 'Gender': 'Male', 'Ethnicity': 'Asian', 'Age': 47.0, 'Hire Date': '2/5/2022', 'Annual Salary': '$92,368 ', 'Bonus %': '0%', 'Country': 'United States', 'City': 'Columbus', 'Exit Date': None}}, {'_index': 'hash