In [2]:
from elasticsearch import Elasticsearch

es = Elasticsearch(
    "http://84.16.230.94:9200", basic_auth=("elastic", "c*+Nbwk82gA7RQfFHkQ6")
)

In [3]:
es.indices.delete(index="department", ignore=404)

# Define the mapping with explicit fields for department and employee
create_index_body = {
    "mappings": {
        "properties": {
            # Join field to create parent-child relationships
            "join_field": {
                "type": "join",
                "relations": {
                    "company": [
                        "department",
                        "supplier",
                    ],  # Parent: company, Children: department and supplier
                    "department": "employee",  # Parent: department, Child: employee
                },
            },
            # Fields for Company
            "company": {
                "properties": {
                    "name": {
                        "type": "text",
                        "fields": {"keyword": {"type": "keyword"}},
                    },
                    "registration_number": {"type": "keyword"},
                    "founded_date": {"type": "date"},
                    "location": {"type": "geo_point"},
                }
            },
            # Fields for Department
            "department": {
                "properties": {
                    "name": {
                        "type": "text",
                        "fields": {"keyword": {"type": "keyword"}},
                    },
                    "budget": {"type": "float"},
                    "floor_number": {"type": "integer"},
                }
            },
            # Fields for Supplier
            "supplier": {
                "properties": {
                    "name": {
                        "type": "text",
                        "fields": {"keyword": {"type": "keyword"}},
                    },
                    "contract_type": {"type": "keyword"},
                    "supply_categories": {"type": "keyword"},
                }
            },
            # Fields for Employee
            "employee": {
                "properties": {
                    "first_name": {
                        "type": "text",
                        "fields": {"keyword": {"type": "keyword"}},
                    },
                    "last_name": {
                        "type": "text",
                        "fields": {"keyword": {"type": "keyword"}},
                    },
                    "email": {"type": "keyword"},
                    "position": {"type": "text"},
                    "salary": {"type": "float"},
                    "hire_date": {"type": "date"},
                    "skills": {"type": "keyword"},
                }
            },
        }
    }
}

# Create the index
index_name = "department"
response = es.indices.create(index=index_name, body=create_index_body)
print(f"Index '{index_name}' created with response:", response)
# Sample data for departments
departments = [
    {"id": "1", "name": "Engineering", "location": "New York", "budget": 500000},
    {"id": "2", "name": "Marketing", "location": "San Francisco", "budget": 300000},
    {"id": "3", "name": "Human Resources", "location": "Chicago", "budget": 200000},
]

# Sample data for employees
employees = {
    "1": [  # Employees for Engineering
        {
            "name": "Alice Smith",
            "position": "Software Engineer",
            "salary": 80000,
            "date_of_joining": "2022-01-15",
        },
        {
            "name": "Bob Johnson",
            "position": "Data Scientist",
            "salary": 85000,
            "date_of_joining": "2022-05-10",
        },
        {
            "name": "Charlie Brown",
            "position": "DevOps Engineer",
            "salary": 78000,
            "date_of_joining": "2021-07-21",
        },
    ],
    "2": [  # Employees for Marketing
        {
            "name": "David Wilson",
            "position": "Marketing Manager",
            "salary": 70000,
            "date_of_joining": "2023-02-10",
        },
        {
            "name": "Eva Green",
            "position": "Content Creator",
            "salary": 55000,
            "date_of_joining": "2022-10-11",
        },
        {
            "name": "Frank White",
            "position": "SEO Specialist",
            "salary": 60000,
            "date_of_joining": "2021-11-05",
        },
    ],
    "3": [  # Employees for Human Resources
        {
            "name": "Grace Kim",
            "position": "HR Manager",
            "salary": 75000,
            "date_of_joining": "2020-04-12",
        },
        {
            "name": "Hank Lee",
            "position": "Recruiter",
            "salary": 50000,
            "date_of_joining": "2021-09-22",
        },
        {
            "name": "Ivy Brown",
            "position": "HR Assistant",
            "salary": 45000,
            "date_of_joining": "2023-01-14",
        },
    ],
}

  es.indices.delete(index="department", ignore=404)


Index 'department' created with response: {'acknowledged': True, 'shards_acknowledged': True, 'index': 'department'}


In [4]:
# Insert department documents
for department in departments:
    department_doc = {
        "department": {
            "name": department["name"],
            "location": department["location"],
            "budget": department["budget"],
        },
        "join_field": "department",
    }
    es.index(index=index_name, id=department["id"], body=department_doc)

# Insert employee documents for each department
for dept_id, dept_employees in employees.items():
    for employee in dept_employees:
        employee_doc = {
            "employee": {
                "name": employee["name"],
                "position": employee["position"],
                "salary": employee["salary"],
                "date_of_joining": employee["date_of_joining"],
            },
            "join_field": {
                "name": "employee",
                "parent": dept_id,  # Link employee to the department
            },
        }
        es.index(index=index_name, routing=dept_id, body=employee_doc)

print("Data inserted successfully.")
query = {"query": {"term": {"join_field": "department"}}}

response = es.search(index="department", body=query)
print("All Departments:", response)
query = {
    "query": {
        "bool": {
            "must": [
                {"term": {"join_field": "employee"}},
                {
                    "parent_id": {"type": "employee", "id": "1"}
                },  # Set parent_id to the department ID
            ]
        }
    }
}

response = es.search(index="department", routing="1", body=query)
print("Employees in Engineering Department:", response)

Data inserted successfully.
All Departments: {'took': 635, 'timed_out': False, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 3, 'relation': 'eq'}, 'max_score': 1.4733057, 'hits': [{'_index': 'department', '_id': '1', '_score': 1.4733057, '_source': {'department': {'name': 'Engineering', 'location': 'New York', 'budget': 500000}, 'join_field': 'department'}}, {'_index': 'department', '_id': '2', '_score': 1.4733057, '_source': {'department': {'name': 'Marketing', 'location': 'San Francisco', 'budget': 300000}, 'join_field': 'department'}}, {'_index': 'department', '_id': '3', '_score': 1.4733057, '_source': {'department': {'name': 'Human Resources', 'location': 'Chicago', 'budget': 200000}, 'join_field': 'department'}}]}}
Employees in Engineering Department: {'took': 4, 'timed_out': False, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 6, 'relation': 'eq'}, 'max_score': 1.4234339, 'hits'

In [6]:
es.indices.delete(index="testingjoins", ignore=404)

# Define the mapping with explicit fields for department and employee
create_index_body = {
    "mappings": {
        "properties": {
            # Join field to create parent-child relationships
            "join_field": {
                "type": "join",
                "relations": {
                    "company": [
                        "department",
                        "supplier",
                    ],  # Parent: company, Children: department and supplier
                    "department": "employee",  # Parent: department, Child: employee
                },
            },
            # Fields for Company
            "company": {
                "properties": {
                    "name": {
                        "type": "text",
                        "fields": {"keyword": {"type": "keyword"}},
                    },
                    "registration_number": {"type": "keyword"},
                    "founded_date": {"type": "date"},
                    "location": {"type": "geo_point"},
                }
            },
            # Fields for Department
            "department": {
                "properties": {
                    "name": {
                        "type": "text",
                        "fields": {"keyword": {"type": "keyword"}},
                    },
                    "budget": {"type": "float"},
                    "floor_number": {"type": "integer"},
                }
            },
            # Fields for Supplier
            "supplier": {
                "properties": {
                    "name": {
                        "type": "text",
                        "fields": {"keyword": {"type": "keyword"}},
                    },
                    "contract_type": {"type": "keyword"},
                    "supply_categories": {"type": "keyword"},
                }
            },
            # Fields for Employee
            "employee": {
                "properties": {
                    "first_name": {
                        "type": "text",
                        "fields": {"keyword": {"type": "keyword"}},
                    },
                    "last_name": {
                        "type": "text",
                        "fields": {"keyword": {"type": "keyword"}},
                    },
                    "email": {"type": "keyword"},
                    "position": {"type": "text"},
                    "salary": {"type": "float"},
                    "hire_date": {"type": "date"},
                    "skills": {"type": "keyword"},
                }
            },
        }
    }
}
# Create the index
index_name = "testingjoins"
response = es.indices.create(index=index_name, body=create_index_body)

  es.indices.delete(index="testingjoins", ignore=404)


In [11]:
# Company Document
company = {
    "name": "Tech Innovations Inc.",
    "registration_number": "REG123456",
    "founded_date": "2005-06-15",
    "location": {"lat": 37.7749, "lon": -122.4194},
    "join_field": "company",
}

es.index(index=index_name, id="company_1", body=company)

ObjectApiResponse({'_index': 'testingjoins', '_id': 'company_1', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 2, '_primary_term': 1})

In [12]:
# Supplier Documents
supplier_1 = {
    "name": "Office Supplies Co.",
    "contract_type": "Annual",
    "supply_categories": ["Office Supplies", "Stationery"],
    "join_field": {"name": "supplier", "parent": "company_1"},
}

supplier_2 = {
    "name": "IT Solutions Ltd.",
    "contract_type": "Project-Based",
    "supply_categories": ["Software", "Hardware"],
    "join_field": {"name": "supplier", "parent": "company_1"},
}

es.index(index=index_name, id="supplier_1", body=supplier_1, routing="company_1")
es.index(index=index_name, id="supplier_2", body=supplier_2, routing="company_1")

ObjectApiResponse({'_index': 'testingjoins', '_id': 'supplier_2', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 4, '_primary_term': 1})

In [13]:
# Department Documents
department_1 = {
    "name": "Research and Development",
    "budget": 500000.0,
    "floor_number": 5,
    "join_field": {"name": "department", "parent": "company_1"},
}

department_2 = {
    "name": "Sales",
    "budget": 300000.0,
    "floor_number": 3,
    "join_field": {"name": "department", "parent": "company_1"},
}

es.index(index=index_name, id="department_1", body=department_1, routing="company_1")
es.index(index=index_name, id="department_2", body=department_2, routing="company_1")

ObjectApiResponse({'_index': 'testingjoins', '_id': 'department_2', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 6, '_primary_term': 1})

In [14]:
# Employees for Department 1 (Research and Development)
employee_1_dep1 = {
    "first_name": "Alice",
    "last_name": "Smith",
    "email": "alice.smith@techinnovations.com",
    "position": "Senior Engineer",
    "salary": 85000.0,
    "hire_date": "2018-09-01",
    "skills": ["Python", "Machine Learning", "AI"],
    "join_field": {"name": "employee", "parent": "department_1"},
}

employee_2_dep1 = {
    "first_name": "Bob",
    "last_name": "Johnson",
    "email": "bob.johnson@techinnovations.com",
    "position": "Data Scientist",
    "salary": 90000.0,
    "hire_date": "2019-11-15",
    "skills": ["Data Analysis", "Statistics", "SQL"],
    "join_field": {"name": "employee", "parent": "department_1"},
}

es.index(
    index=index_name,
    id="employee_1_dep1",
    body=employee_1_dep1,
    routing="department_1",
)
es.index(
    index=index_name,
    id="employee_2_dep1",
    body=employee_2_dep1,
    routing="department_1",
)

# Employees for Department 2 (Sales)
employee_1_dep2 = {
    "first_name": "Carol",
    "last_name": "Williams",
    "email": "carol.williams@techinnovations.com",
    "position": "Sales Manager",
    "salary": 75000.0,
    "hire_date": "2020-02-10",
    "skills": ["Salesforce", "Negotiation", "Marketing"],
    "join_field": {"name": "employee", "parent": "department_2"},
}

employee_2_dep2 = {
    "first_name": "David",
    "last_name": "Brown",
    "email": "david.brown@techinnovations.com",
    "position": "Account Executive",
    "salary": 65000.0,
    "hire_date": "2021-05-21",
    "skills": ["Customer Service", "CRM", "Communication"],
    "join_field": {"name": "employee", "parent": "department_2"},
}

es.index(
    index=index_name,
    id="employee_1_dep2",
    body=employee_1_dep2,
    routing="department_2",
)
es.index(
    index=index_name,
    id="employee_2_dep2",
    body=employee_2_dep2,
    routing="department_2",
)

ObjectApiResponse({'_index': 'testingjoins', '_id': 'employee_2_dep2', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 10, '_primary_term': 1})

In [21]:
# Search for companies with departments having an employee named "John Doe"
response = es.search(
    index=index_name,
    body={
        "query": {
            "has_child": {
                "type": "department",
                "query": {
                    "has_child": {
                        "type": "employee",
                        "query": {"term": {"first_name.keyword": "David"}},
                    }
                },
            }
        }
    },
)

# Print search results
for hit in response["hits"]["hits"]:
    print(hit["_source"])

{'name': 'Tech Innovations Inc.', 'registration_number': 'REG123456', 'founded_date': '2005-06-15', 'location': {'lat': 37.7749, 'lon': -122.4194}, 'join_field': 'company'}


In [25]:
response = es.search(
    index=index_name,
    body={
        "query": {
            "has_child": {
                "type": "employee",
                "query": {
                    "has_parent": {
                        "parent_type": "department",
                        "query": {"term": {"name.keyword": "Research and Development"}},
                    }
                },
            }
        }
    },
    "_source": ["first_name", "last_name"],
)

# Print search results
for hit in response["hits"]["hits"]:
    print(hit["_source"])

{'name': 'Research and Development', 'budget': 500000.0, 'floor_number': 5, 'join_field': {'name': 'department', 'parent': 'company_1'}}


In [64]:
response = es.search(
    index=index_name,
    body={
        "query": {
            "bool": {
                "must": [
                    {
                        "has_parent": {
                            "parent_type": "company",
                            "inner_hits": {},
                            "query": {"term": {"name.keyword": "Tech Innovations Inc."}},  # Changed to `match` for text field

                        }
                    },
                    {
                        "term": {
                            "join_field": "department"  # Ensure we only get departments
                        }
                    }
                ]
            }
        }
    }
)
response

ObjectApiResponse({'took': 2, 'timed_out': False, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 2, 'relation': 'eq'}, 'max_score': 2.568616, 'hits': [{'_index': 'testingjoins', '_id': 'department_1', '_score': 2.568616, '_routing': 'company_1', '_source': {'name': 'Research and Development', 'budget': 500000.0, 'floor_number': 5, 'join_field': {'name': 'department', 'parent': 'company_1'}}, 'inner_hits': {'company': {'hits': {'total': {'value': 1, 'relation': 'eq'}, 'max_score': 1.3862942, 'hits': [{'_index': 'testingjoins', '_id': 'company_1', '_score': 1.3862942, '_source': {'name': 'Tech Innovations Inc.', 'registration_number': 'REG123456', 'founded_date': '2005-06-15', 'location': {'lat': 37.7749, 'lon': -122.4194}, 'join_field': 'company'}}]}}}}, {'_index': 'testingjoins', '_id': 'department_2', '_score': 2.568616, '_routing': 'company_1', '_source': {'name': 'Sales', 'budget': 300000.0, 'floor_number': 3, 'join_field': {'name': 

In [61]:
response

ObjectApiResponse({'took': 3, 'timed_out': False, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 2, 'relation': 'eq'}, 'max_score': 2.568616, 'hits': [{'_index': 'testingjoins', '_id': 'department_1', '_score': 2.568616, '_routing': 'company_1', '_source': {'name': 'Research and Development', 'budget': 500000.0, 'floor_number': 5, 'join_field': {'name': 'department', 'parent': 'company_1'}}, 'inner_hits': {'company': {'hits': {'total': {'value': 1, 'relation': 'eq'}, 'max_score': 1.3042111, 'hits': [{'_index': 'testingjoins', '_id': 'company_1', '_score': 1.3042111, '_source': {'name': 'Tech Innovations Inc.', 'registration_number': 'REG123456', 'founded_date': '2005-06-15', 'location': {'lat': 37.7749, 'lon': -122.4194}, 'join_field': 'company'}}]}}}}, {'_index': 'testingjoins', '_id': 'department_2', '_score': 2.568616, '_routing': 'company_1', '_source': {'name': 'Sales', 'budget': 300000.0, 'floor_number': 3, 'join_field': {'name': 

In [56]:
# Perform has_parent query with inner_hits for department parent documents
response = es.search(
    index=index_name,
    body={
        "query": {
            "has_parent": {
                "parent_type": "department",
                "inner_hits": {},  # Enables inner hits
                "query": {"term": {"name.keyword": "Sales"}},
            }
        }
    },
)
response

ObjectApiResponse({'took': 2, 'timed_out': False, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 2, 'relation': 'eq'}, 'max_score': 1.0, 'hits': [{'_index': 'testingjoins', '_id': 'employee_1_dep2', '_score': 1.0, '_routing': 'department_2', '_source': {'first_name': 'Carol', 'last_name': 'Williams', 'email': 'carol.williams@techinnovations.com', 'position': 'Sales Manager', 'salary': 75000.0, 'hire_date': '2020-02-10', 'skills': ['Salesforce', 'Negotiation', 'Marketing'], 'join_field': {'name': 'employee', 'parent': 'department_2'}}, 'inner_hits': {'department': {'hits': {'total': {'value': 1, 'relation': 'eq'}, 'max_score': 1.3862942, 'hits': [{'_index': 'testingjoins', '_id': 'department_2', '_score': 1.3862942, '_routing': 'company_1', '_source': {'name': 'Sales', 'budget': 300000.0, 'floor_number': 3, 'join_field': {'name': 'department', 'parent': 'company_1'}}}]}}}}, {'_index': 'testingjoins', '_id': 'employee_2_dep2', '_score': 1

In [55]:
response = es.search(
    index=index_name,
    body={
        "query": {
            "has_parent": {
                "parent_type": "department",
                "inner_hits": {},  # Enables inner hits
                "query": {"match": {"name": "Sales"}},  # Changed to `match` for text field
            }
        }
    },
)
response

ObjectApiResponse({'took': 3, 'timed_out': False, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 2, 'relation': 'eq'}, 'max_score': 1.0, 'hits': [{'_index': 'testingjoins', '_id': 'employee_1_dep2', '_score': 1.0, '_routing': 'department_2', '_source': {'first_name': 'Carol', 'last_name': 'Williams', 'email': 'carol.williams@techinnovations.com', 'position': 'Sales Manager', 'salary': 75000.0, 'hire_date': '2020-02-10', 'skills': ['Salesforce', 'Negotiation', 'Marketing'], 'join_field': {'name': 'employee', 'parent': 'department_2'}}, 'inner_hits': {'department': {'hits': {'total': {'value': 1, 'relation': 'eq'}, 'max_score': 1.8527111, 'hits': [{'_index': 'testingjoins', '_id': 'department_2', '_score': 1.8527111, '_routing': 'company_1', '_source': {'name': 'Sales', 'budget': 300000.0, 'floor_number': 3, 'join_field': {'name': 'department', 'parent': 'company_1'}}}]}}}}, {'_index': 'testingjoins', '_id': 'employee_2_dep2', '_score': 1