diff --git a/.github/workflows/cogstack-es_main.yml b/.github/workflows/cogstack-es_main.yml
new file mode 100644
index 000000000..91ab1b854
--- /dev/null
+++ b/.github/workflows/cogstack-es_main.yml
@@ -0,0 +1,42 @@
+name: cogstack-es - Test
+
+on:
+  push:
+    branches: [ main ]
+  pull_request:
+    paths:
+      - 'cogstack-es/**'
+      - '.github/workflows/cogstack-es**'
+
+defaults:
+  run:
+    working-directory: ./cogstack-es
+
+jobs:
+  types-lint-tests:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: [ '3.9', '3.10', '3.11', '3.12' ]
+      max-parallel: 4
+
+    steps:
+      - uses: actions/checkout@v5
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r requirements.txt
+          pip install -r requirements-dev.txt
+      - name: Check types
+        run: |
+          python -m mypy --follow-imports=normal *.py
+      - name: Lint
+        run: |
+          ruff check *.py
+      - name: Test
+        run: |
+          pytest tests
diff --git a/cogstack-es/ReadMe.md b/cogstack-es/ReadMe.md
new file mode 100644
index 000000000..7f63dbf9c
--- /dev/null
+++ b/cogstack-es/ReadMe.md
@@ -0,0 +1,27 @@
+
+# Login and search
+This project is responsible for logging in and performing a search.
+
+## Login details
+1. Create a [credentials.py](credentials.py)
+2. Populate it with your cogstack instance and login details
+An example template can be seen below:
+```
+hosts = []  # This is a list of your cogstack elasticsearch instances.
+
+# These are your login details (either via http_auth or API)
+username = None
+password = None
+```
+
+__Note__: If these fields are left blank then the user will be prompted to enter the details themselves.
+
+If you are unsure about the above information please contact your CogStack system administrator.
+
+## How to build a Search query
+
+A core component of cogstack is Elasticsearch which is a search engine built on top of Apache Lucene.
+
+Lucene has a custom query syntax for querying its indexes (Lucene Query Syntax). This query syntax allows for features such as Keyword matching, Wildcard matching, Regular expression, Proximity matching, Range searches.
+
+Full documentation for this syntax is available as part of Elasticsearch [query string syntax](https://www.elastic.co/guide/en/elasticsearch/reference/8.5/query-dsl-query-string-query.html#query-string-syntax).
\ No newline at end of file
diff --git a/cogstack-es/cogstack.py b/cogstack-es/cogstack.py
new file mode 100644
index 000000000..37965194f
--- /dev/null
+++ b/cogstack-es/cogstack.py
@@ -0,0 +1,930 @@
+from collections.abc import Mapping
+import getpass
+import traceback
+from typing import Any, Optional, Iterable, Sequence, Union, Protocol
+from typing import cast, TYPE_CHECKING
+import warnings
+
+
+if TYPE_CHECKING:
+    from elasticsearch import Elasticsearch as _Elasticsearch
+    # from opensearchpy import OpenSearch as _OpenSearch
+    import elasticsearch.helpers
+    # import opensearchpy.helpers
+    # ElasticClient = Union[_Elasticsearch, _OpenSearch]
+    ElasticClient = _Elasticsearch
+    es_cls = _Elasticsearch
+    es_helpers = elasticsearch.helpers
+else:
+    try:
+        from elasticsearch import Elasticsearch as ElasticClient
+        import elasticsearch.helpers
+        es_helpers = elasticsearch.helpers
+    except ImportError:
+        from opensearchpy import OpenSearch as ElasticClient
+        import opensearchpy.helpers
+        es_helpers = opensearchpy.helpers
+    es_cls = ElasticClient
+from IPython.display import display, HTML
+import pandas as pd
+import tqdm
+
+warnings.filterwarnings("ignore")
+
+
+class IndicesClientProto(Protocol):
+
+    def get_alias(self, *args: Any, **kwargs: Any) -> Any:
+        pass
+
+    def get_mapping(self, *args: Any, **kwargs: Any) -> Any:
+        pass
+
+
+class ESClient(Protocol):
+
+    @property
+    def indices(self) -> IndicesClientProto:
+        pass
+
+    def count(self, *args: Any, **kwargs: Any) -> Any:
+        pass
+
+    def search(self, *args: Any, **kwargs: Any) -> Any:
+        pass
+
+    def scroll(self, *args: Any, **kwargs: Any) -> Any:
+        pass
+
+    def clear_scroll(self, *args: Any, **kwargs: Any) -> Any:
+        pass
+
+
+class CogStack:
+    """
+    A class for interacting with Elasticsearch or OpenSearch.
+
+    Parameters
+    ------------
+        elastic : ESClient
+            The ElasticSearch or OpenSearch instance.
+    """
+
+    ES_TIMEOUT = 300
+
+    def __init__(self, elastic: ESClient) -> None:
+        self.elastic = elastic
+
+    @classmethod
+    def with_basic_auth(
+        cls,
+        hosts: list[str],
+        username: Optional[str] = None,
+        password: Optional[str] = None,
+    ) -> "CogStack":
+        """
+        Create an instance of CogStack using basic authentication.
+
+        Parameters
+        ----------
+        hosts : list[str]
+            A list of Elasticsearch host URLs.
+        username : str, optional
+            The username to use when connecting to Elasticsearch.
+            If not provided, the user will be prompted to enter a username.
+        password : str, optional
+            The password to use when connecting to Elasticsearch.
+            If not provided, the user will be prompted to enter a password.
+        Returns
+        -------
+            CogStack: An instance of the CogStack class.
+        """
+        elastic = CogStack.get_es_with_basic_auth(hosts, username, password)
+        return cls(elastic)
+
+    @classmethod
+    def with_api_key_auth(
+        cls, hosts: list[str], api_key: Optional[dict] = None
+    ) -> "CogStack":
+        """
+        Create an instance of CogStack using API key authentication.
+
+        Parameters
+        ----------
+        hosts : list[str]
+            A list of Elasticsearch host URLs.
+        apiKey : dict, optional
+
+            API key object with string fields either:
+            - A: "id" and "api_key"
+            - B: "encoded"
+            Generated in Elasticsearch or Kibana and provided by your
+            CogStack administrator.
+
+            If not provided, the user will be prompted to enter API
+            key "encoded" value.
+
+            Example:
+                .. code-block:: json
+                        {
+                            "id": "API_KEY_ID",
+                            "api_key": "API_KEY",
+                            "encoded": "API_KEY_ENCODED_STRING"
+                        }
+        Returns
+        -------
+            CogStack: An instance of the CogStack class.
+        """
+        elastic = CogStack.get_es_with_api_key(hosts, api_key)
+        return cls(elastic)
+
+    @staticmethod
+    def get_es_with_basic_auth(
+        hosts: list[str], username: Optional[str] = None,
+        password: Optional[str] = None
+    ) -> ESClient:
+        """
+        Create an instance of CogStack using basic authentication.
+        If the `username` or `password` parameters are not provided,
+        the user will be prompted to enter them.
+
+        Parameters
+        ----------
+        hosts : list[str]
+            A list of Elasticsearch or OpenSearch host URLs.
+        username : str, optional
+            The username to use when connecting to Elasticsearch or OpenSearch.
+            If not provided, the user will be prompted to enter a username.
+        password : str, optional
+            The password to use when connecting to Elasticsearch or OpenSearch.
+            If not provided, the user will be prompted to enter a password.
+
+        Returns
+        -------
+            ESClient: An instance of the Elasticsearch or OpenSearch.
+        """
+        if username is None:
+            username = input("Username: ")
+        if password is None:
+            password = getpass.getpass("Password: ")
+
+        return CogStack.__connect(
+            hosts,
+            basic_auth=(username, password) if username and password else None
+        )
+
+    @staticmethod
+    def get_es_with_api_key(hosts: list[str],
+                            api_key: Optional[dict] = None
+                            ) -> ESClient:
+        """
+        Create an instance of CogStack using API key authentication.
+
+        Parameters
+        ----------
+        hosts : list[str]
+            A list of Elasticsearch or OpenSearch host URLs.
+        apiKey : Dict, optional
+
+            API key object with string fields either:
+            - A: "id" and "api_key"
+            - B: "encoded"
+            Generated in Elasticsearch, OpenSearch, or Kibana and provided by
+            your CogStack administrator.
+
+            If not provided, the user will be prompted to enter API
+            key "encoded" value.
+
+            If not provided, the user will be prompted to enter API
+            key "encoded" value.
+
+            Example:
+             .. code-block:: json
+                    {
+                        "id": "API_KEY_ID",
+                        "api_key": "API_KEY",
+                        "encoded": "API_KEY_ENCODED_STRING"
+                    }
+
+        Returns
+        -------
+            ESClient: An instance of the Elasticsearch or OpenSearch.
+        """
+        has_encoded_value = False
+        api_id_value: str
+        api_key_value: str
+
+        if not api_key:
+            api_key = {"encoded": input("Encoded API key: ")}
+        else:
+            if isinstance(api_key, str):
+                # If api_key is a string, it is assumed to be
+                # the encoded API key
+                encoded = api_key
+                has_encoded_value = True
+            elif isinstance(api_key, dict):
+                # If api_key is a dictionary, check for "encoded",
+                # "id" and "api_key" keys
+                if (
+                    "id" in api_key.keys()
+                    and api_key["id"] != ""
+                    and "api_key" in api_key.keys()
+                    and api_key["api_key"] != ""
+                ):
+                    # If both "id" and "api_key" are present, use them
+                    encoded = None
+                else:
+                    # If "encoded" is present, use it; otherwise prompt for it
+                    encoded = (
+                        api_key["encoded"]
+                        if "encoded" in api_key.keys() and api_key["encoded"]
+                        else input("Encoded API key: ")
+                    )
+                    has_encoded_value = encoded is not None and encoded != ""
+
+            if not has_encoded_value:
+                api_id_value = str(
+                    api_key["id"]
+                    if "id" in api_key.keys() and api_key["id"] != ""
+                    else input("API Id: ")
+                )
+                api_key_value = str(
+                    api_key["api_key"]
+                    if "api_key" in api_key.keys() and api_key["api_key"] != ""
+                    else getpass.getpass("API Key: ")
+                )
+
+        return CogStack.__connect(
+            hosts,
+            api_key=encoded if has_encoded_value else
+            (api_id_value, api_key_value)
+        )
+
+    @staticmethod
+    def __connect(
+        hosts: list[str],
+        basic_auth: Optional[tuple[str, str]] = None,
+        api_key: Optional[Union[str, tuple[str, str]]] = None,
+    ) -> ESClient:
+        """Connect to Elasticsearch or OpenSearch using the credentials.
+        Parameters
+        ----------
+            hosts : list[str]
+                A list of Elasticsearch or OpenSearch host URLs.
+            basic_auth : Tuple[str, str], optional
+                A tuple containing the username and password for
+                basic authentication.
+            api_key : str or Tuple[str, str], optional
+                The API key or a tuple containing the API key ID and API key
+                for API key authentication.
+        Returns
+        -------
+            ESClient: An instance of the Elasticsearch or OpenSearch.
+        Raises
+        ------
+            Exception: If the connection to Elasticsearch or OpenSearch fails.
+        """
+        elastic = es_cls(
+            hosts=hosts,
+            api_key=api_key,
+            basic_auth=basic_auth,
+            verify_certs=False,
+            request_timeout=CogStack.ES_TIMEOUT,
+        )
+        if not elastic.ping():
+            raise ConnectionError(
+                "CogStack connection failed. "
+                "Please check your host list and credentials and try again."
+            )
+        print("CogStack connection established successfully.")
+        return elastic
+
+    def get_indices_and_aliases(self):
+        """
+        Retrieve indices and their aliases
+
+        Returns:
+        ---------
+            A table of indices and aliases to use in subsequent queries
+        """
+        all_aliases = self.elastic.indices.get_alias().body
+        index_aliases_coll = []
+        for index in all_aliases:
+            index_aliases = {}
+            index_aliases["Index"] = index
+            aliases = []
+            for alias in all_aliases[index]["aliases"]:
+                aliases.append(alias)
+            index_aliases["Aliases"] = ", ".join(aliases)
+            index_aliases_coll.append(index_aliases)
+        with pd.option_context("display.max_colwidth", None):
+            return pd.DataFrame(
+                index_aliases_coll, columns=["Index", "Aliases"])
+
+    def get_index_fields(self, index: Union[str, Sequence[str]]):
+        """
+        Retrieve indices and their fields with data type
+
+        Parameters
+        ----------
+         index: str | Sequence[str]
+            Name(s) of indices or aliases for which the list of fields
+            is retrieved
+
+        Returns
+        ----------
+            pandas.DataFrame
+                A DataFrame containing index names and their fields with
+                data types
+
+        Raises
+        ------
+            Exception
+                If the operation fails for any reason.
+        """
+        try:
+            if len(index) == 0:
+                raise ValueError(
+                    "Provide at least one index or index alias name")
+            all_mappings = self.elastic.indices.get_mapping(
+                index=index, allow_no_indices=False
+            ).body
+            columns = ["Field", "Type"]
+            if isinstance(index, list):
+                columns.insert(0, "Index")
+            index_mappings_coll = []
+            for index_name in all_mappings:
+                for property_name in all_mappings[
+                        index_name]["mappings"]["properties"]:
+                    index_mapping = {}
+                    index_mapping["Index"] = index_name
+                    index_mapping["Field"] = property_name
+                    index_mapping["Type"] = (
+                        all_mappings[index_name]["mappings"]["properties"][
+                            property_name
+                        ]["type"]
+                        if "type"
+                        in all_mappings[index_name]["mappings"]["properties"][
+                            property_name
+                        ].keys()
+                        else "?"
+                    )
+                    index_mappings_coll.append(index_mapping)
+        except Exception as err:
+            raise Exception(
+                "Unexpected issue while getting index fields") from err
+        with pd.option_context(
+                "display.max_rows", len(index_mappings_coll) + 1):
+            return display(pd.DataFrame(
+                data=index_mappings_coll, columns=columns))
+
+    def count_search_results(self, index: Union[str, Sequence[str]],
+                             query: dict):
+        """
+        Count number of documents returned by the query
+
+        Parameters
+        ----------
+             index : str or Sequence[str]
+                     The name(s) of the Elasticsearch indices or their
+                     aliases to search.
+
+             query : dict
+                     A dictionary containing the search query parameters.
+                     Query can start with `query` key and contain other
+                     query options which will be ignored
+
+                         .. code-block:: json
+                             {"query": {"match": {"title": "python"}}}}
+                     or only consist of content of `query` block
+                         .. code-block:: json
+                             {"match": {"title": "python"}}}
+        """
+        if len(index) == 0:
+            raise ValueError("Provide at least one index or index alias name")
+        query = self.__extract_query(query=query)
+        count = self.elastic.count(index=index, query=query,
+                                   allow_no_indices=False)["count"]
+        return f"Number of documents: {format(count, ',')}"
+
+    def read_data_with_scan(
+        self,
+        index: Union[str, Sequence[str]],
+        query: dict,
+        include_fields: Optional[list[str]] = None,
+        size: int = 1000,
+        request_timeout: int = ES_TIMEOUT,
+        show_progress: bool = True,
+    ):
+        """
+        Retrieve documents from an Elasticsearch or OpenSearch index or
+        indices using search query and elasticsearch or OpenSearch scan helper
+        function. The function converts search results to a Pandas DataFrame
+        and does not return current scroll id if the process fails.
+
+        Parameters
+        ----------
+            index : str or Sequence[str]
+                    The name(s) of the Elasticsearch or OpenSearch indices or
+                    their aliases to search.
+            query : dict
+                    A dictionary containing the search query parameters.
+                    Query can start with `query` key and contain other
+                    query options which will be used in the search
+
+                        .. code-block:: json
+                            {"query": {"match": {"title": "python"}}}}
+                    or only consist of content of `query` block
+                    (preferred method to avoid clashing with other parameters)
+
+                        .. code-block:: json
+                            {"match": {"title": "python"}}}
+
+            include_fields : list[str], optional
+                    A list of fields to be included in search results
+                    and presented as columns in the DataFrame.
+                    If not provided, only _index, _id and _score fields will
+                    be included. Columns <strong>_index, _id, _score</strong>
+                    are present in all search results
+            size : int, optional, default = 1000
+                    The number of documents to be returned by the query or
+                    scroll API during each iteration.
+                    <strong>MAX: 10,000</strong>.
+            request_timeout : int, optional, default=300
+                    The time in seconds to wait for a response
+                    from Elasticsearch or OpenSearch before timing out.
+            show_progress : bool, optional, default=True
+                    Whether to show the progress in console.
+        Returns
+        ------
+        pandas.DataFrame
+            A DataFrame containing the retrieved documents.
+
+        Raises
+        ------
+        Exception
+            If the search fails or cancelled by the user.
+        """
+        pr_bar: Optional[tqdm.tqdm] = None
+        try:
+            if len(index) == 0:
+                raise ValueError(
+                    "Provide at least one index or index alias name")
+            self.__validate_size(size=size)
+            if "query" not in query.keys():
+                temp_query = query.copy()
+                query.clear()
+                query["query"] = temp_query
+            pr_bar = tqdm.tqdm(
+                desc="CogStack retrieved...",
+                disable=not show_progress, colour="green"
+            )
+
+            scan_results = es_helpers.scan(
+                cast(es_cls, self.elastic),
+                index=index,
+                query=query,
+                size=size,
+                request_timeout=request_timeout,
+                source=False,
+                fields=include_fields,
+                allow_no_indices=False,
+            )
+            all_mapped_results = []
+            pr_bar.iterable = scan_results
+            pr_bar.total = self.elastic.count(
+                index=index, query=query["query"])["count"]
+            all_mapped_results = self.__map_search_results(hits=pr_bar)
+        except BaseException as err:
+            if isinstance(err, KeyboardInterrupt):
+                if pr_bar is not None:
+                    pr_bar.bar_format = "%s{l_bar}%s{bar}%s{r_bar}" % (
+                        "\033[0;33m",
+                        "\033[0;33m",
+                        "\033[0;33m",
+                    )
+                    pr_bar.set_description(
+                        "CogStack read cancelled! Processed", refresh=True
+                    )
+                print("Request cancelled and current "
+                      "search_scroll_id deleted...")
+            else:
+                if pr_bar is not None:
+                    pr_bar.bar_format = "%s{l_bar}%s{bar}%s{r_bar}" % (
+                        "\033[0;31m",
+                        "\033[0;31m",
+                        "\033[0;31m",
+                    )
+                    pr_bar.set_description(
+                        "CogStack read failed! Processed", refresh=True
+                    )
+                print(
+                    Exception(
+                        f"Unexpected {err=},\n {traceback.format_exc()}, "
+                        f"{type(err)=}")
+                )
+        return self.__create_dataframe(all_mapped_results, include_fields)
+
+    def read_data_with_scroll(
+        self,
+        index: Union[str, Sequence[str]],
+        query: dict,
+        include_fields: Optional[list[str]] = None,
+        size: int = 1000,
+        search_scroll_id: Optional[str] = None,
+        request_timeout: Optional[int] = ES_TIMEOUT,
+        show_progress: Optional[bool] = True,
+    ):
+        """
+        Retrieves documents from an Elasticsearch or OpenSearch index using
+        search query and scroll API. Default scroll timeout is set to 10
+        minutes. The function converts search results to a Pandas DataFrame.
+
+        Parameters
+        ----------
+            index : str or Sequence[str]
+                    The name(s) of the Elasticsearch or OpenSearch indices or
+                    their aliases to search.
+            query : dict
+                    A dictionary containing the search query parameters.
+                    Query can start with `query` key
+                    and contain other query options which will be ignored
+
+                        .. code-block:: json
+                            {"query": {"match": {"title": "python"}}}}
+                    or only consist of content of `query` block
+                        .. code-block:: json
+                            {"match": {"title": "python"}}}
+
+            include_fields : list[str], optional
+                    A list of fields to be included in search results
+                    and presented as columns in the DataFrame.
+                    If not provided, only _index, _id and _score fields
+                    will be included.
+                    Columns <strong>_index, _id, _score</strong> are present
+                    in all search results
+            size : int, optional, default = 1000
+                    The number of documents to be returned by the query
+                    or scroll API during each iteration.
+                    <strong>MAX: 10,000</strong>.
+            search_scroll_id : str, optional
+                    The value of the last <strong>scroll_id</strong>
+                    returned by scroll API and used to continue the search
+                    if the current search fails.
+                    The value of <strong>scroll_id</strong>
+                    times out after <strong>10 minutes</strong>.
+                    After which the search will have to be restarted.
+                    <strong>Note:</strong> Absence of this parameter indicates
+                    a new search.
+            request_timeout : int, optional, default=300
+                    The time in seconds to wait for a response from
+                    Elasticsearch or OpenSearch before timing out.
+            show_progress : bool, optional, default=True
+                    Whether to show the progress in console.
+                    <strong>IMPORTANT:</strong> The progress bar displays
+                    the total hits for the query even if continuing the
+                    search using `search_scroll_id`.
+        Returns
+        ------
+        pandas.DataFrame
+            A DataFrame containing the retrieved documents.
+
+        Raises
+        ------
+        Exception
+            If the search fails or cancelled by the user.
+            If the search fails, error message includes the value of
+            current `search_scroll_id` which can be used as a function
+            parameter to continue the search. <strong>IMPORTANT:</strong>
+            If the function fails after `scroll` request, the subsequent
+            request will skip results of the failed scroll by the value
+            of `size` parameter.
+        """
+        try:
+            if len(index) == 0:
+                raise ValueError(
+                    "Provide at least one index or index alias name")
+            self.__validate_size(size=size)
+            query = self.__extract_query(query=query)
+            result_count = size
+            all_mapped_results = []
+            search_result = None
+            include_fields_map: Union[Sequence[Mapping[str, Any]], None] = (
+                [{"field": field} for field in include_fields]
+                if include_fields is not None
+                else None
+            )
+
+            pr_bar = tqdm.tqdm(
+                desc="CogStack retrieved...",
+                disable=not show_progress, colour="green")
+
+            if search_scroll_id is None:
+                search_result = self.elastic.search(
+                    index=index,
+                    size=size,
+                    query=query,
+                    fields=include_fields_map,
+                    source=False,
+                    scroll="10m",
+                    timeout=f"{request_timeout}s",
+                    allow_no_indices=False,
+                    rest_total_hits_as_int=True,
+                )
+
+                pr_bar.total = search_result.body["hits"]["total"]
+                hits = search_result.body["hits"]["hits"]
+                result_count = len(hits)
+                search_scroll_id = search_result.body["_scroll_id"]
+                all_mapped_results.extend(self.__map_search_results(hits=hits))
+                pr_bar.update(len(hits))
+                if search_result["_shards"]["failed"] > 0:
+                    raise LookupError(search_result["_shards"]["failures"])
+
+            while search_scroll_id and result_count == size:
+                # Perform ES scroll request
+                search_result = self.elastic.scroll(
+                    scroll_id=search_scroll_id,
+                    scroll="10m",
+                    rest_total_hits_as_int=True,
+                )
+                hits = search_result.body["hits"]["hits"]
+                pr_bar.total = (
+                    pr_bar.total
+                    if pr_bar.total
+                    else search_result.body["hits"]["total"]
+                )
+                all_mapped_results.extend(self.__map_search_results(hits=hits))
+                search_scroll_id = search_result.body["_scroll_id"]
+                result_count = len(hits)
+                pr_bar.update(result_count)
+            self.elastic.clear_scroll(scroll_id=search_scroll_id)
+        except BaseException as err:
+            if isinstance(err, KeyboardInterrupt):
+                pr_bar.bar_format = "%s{l_bar}%s{bar}%s{r_bar}" % (
+                    "\033[0;33m",
+                    "\033[0;33m",
+                    "\033[0;33m",
+                )
+                pr_bar.set_description(
+                    "CogStack read cancelled! Processed", refresh=True
+                )
+                self.elastic.clear_scroll(scroll_id=search_scroll_id)
+                print("Request cancelled and current "
+                      "search_scroll_id deleted...")
+            else:
+                if pr_bar is not None:
+                    pr_bar.bar_format = "%s{l_bar}%s{bar}%s{r_bar}" % (
+                        "\033[0;31m",
+                        "\033[0;31m",
+                        "\033[0;31m",
+                    )
+                    pr_bar.set_description(
+                        "CogStack read failed! Processed", refresh=True
+                    )
+                print(
+                    Exception(
+                        f"Unexpected {err=},\n {traceback.format_exc()}, "
+                        f"{type(err)=}"),
+                    f"{search_scroll_id=}",
+                    sep="\n",
+                )
+
+        return self.__create_dataframe(all_mapped_results, include_fields)
+
+    def read_data_with_sorting(
+        self,
+        index: Union[str, Sequence[str]],
+        query: dict,
+        include_fields: Optional[list[str]] = None,
+        size: Optional[int] = 1000,
+        sort: Optional[Union[dict, list[str]]] = None,
+        search_after: Optional[list[Union[str, int, float, Any, None]]] = None,
+        request_timeout: Optional[int] = ES_TIMEOUT,
+        show_progress: Optional[bool] = True,
+    ):
+        """
+        Retrieve documents from an Elasticsearch or OpenSearch index using
+        search query and convert them to a Pandas DataFrame.
+
+        Parameters
+        ----------
+            index : str or Sequence[str]
+                    The name(s) of the Elasticsearch  or OpenSearch indices or
+                    their aliases to search.
+            query : dict
+                    A dictionary containing the search query parameters.
+                    Query can start with `query` key and contain other
+                    query options which will be ignored
+
+                        .. code-block:: json
+                            {"query": {"match": {"title": "python"}}}}
+                    or only consist of content of `query` block
+                        .. code-block:: json
+                            {"match": {"title": "python"}}}
+            include_fields : list[str], optional
+                    A list of fields to be included in search results
+                    and presented as columns in the DataFrame.
+                    If not provided, only _index, _id and _score
+                    fields will be included.
+                    Columns <strong>_index, _id, _score</strong> are
+                    present in all search results
+            size : int, optional, default = 1000
+                    The number of documents to be returned by the query
+                    or scroll API during each iteration.
+                    <strong>MAX: 10,000</strong>.
+            sort : dict|list[str], optional, default = {"id": "asc"}
+                    Sort field name(s) and order (`asc` or `desc`)
+                    in dictionary format or list of field names without order.
+                    `{"id":"asc"}` or `id` is added if not provided as a
+                    tiebreaker field. Default sorting order is `asc`
+                    <strong>Example:</strong>
+                    - `dict : {"filed_Name" : "desc", "id" : "asc"}`
+                    - `list : ["filed_Name", "id"]`
+            search_after : list[str|int|float|Any|None], optional
+                    The sort value of the last record in search results.
+                    Can be provided if the a search fails and needs to
+                    be restarted from the last successful search.
+                    Use the value of `search_after_value` from the
+                    error message
+            request_timeout : int, optional, default = 300
+                    The time in seconds to wait for a response from
+                    Elasticsearch or OpenSearch before timing out.
+            show_progress : bool, optional
+                    Whether to show the progress in console. Defaults to true.
+
+        Returns
+        ------
+            pandas.DataFrame
+                A DataFrame containing the retrieved documents.
+
+        Raises
+        ------
+            Exception
+                If the search fails or cancelled by the user.
+                Error message includes the value of current
+                `search_after_value` which can be used as a function
+                parameter to continue the search.
+        """
+        try:
+            if len(index) == 0:
+                raise ValueError(
+                    "Provide at least one index or index alias name")
+            result_count = size
+            all_mapped_results = []
+            if sort is None:
+                sort = {"id": "asc"}
+            search_after_value = search_after
+            include_fields_map: Union[Sequence[Mapping[str, Any]], None] = (
+                [{"field": field} for field in include_fields]
+                if include_fields is not None
+                else None
+            )
+
+            self.__validate_size(size=size)
+            query = self.__extract_query(query=query)
+
+            if (isinstance(sort, dict) and "id" not in sort.keys()) or (
+                isinstance(sort, list) and "id" not in sort
+            ):
+                if isinstance(sort, dict):
+                    sort["id"] = "asc"
+                else:
+                    sort.append("id")
+            pr_bar = tqdm.tqdm(
+                desc="CogStack retrieved...",
+                disable=not show_progress, colour="green")
+
+            while result_count == size:
+                search_result = self.elastic.search(
+                    index=index,
+                    size=size,
+                    query=query,
+                    fields=include_fields_map,
+                    source=False,
+                    sort=sort,
+                    search_after=search_after_value,
+                    timeout=f"{request_timeout}s",
+                    track_scores=True,
+                    track_total_hits=True,
+                    allow_no_indices=False,
+                    rest_total_hits_as_int=True,
+                )
+                hits = search_result["hits"]["hits"]
+                all_mapped_results.extend(self.__map_search_results(hits=hits))
+                result_count = len(hits)
+                pr_bar.update(result_count)
+                search_after_value = hits[-1]["sort"]
+                pr_bar.total = (
+                    pr_bar.total
+                    if pr_bar.total
+                    else search_result.body["hits"]["total"]
+                )
+                if search_result["_shards"]["failed"] > 0:
+                    raise LookupError(search_result["_shards"]["failures"])
+        except BaseException as err:
+            if isinstance(err, KeyboardInterrupt):
+                pr_bar.bar_format = "%s{l_bar}%s{bar}%s{r_bar}" % (
+                    "\033[0;33m",
+                    "\033[0;33m",
+                    "\033[0;33m",
+                )
+                pr_bar.set_description(
+                    "CogStack read cancelled! Processed", refresh=True
+                )
+                print("Request cancelled.")
+            else:
+                if pr_bar is not None:
+                    pr_bar.bar_format = "%s{l_bar}%s{bar}%s{r_bar}" % (
+                        "\033[0;31m",
+                        "\033[0;31m",
+                        "\033[0;31m",
+                    )
+                    pr_bar.set_description(
+                        "CogStack read failed! Processed", refresh=True
+                    )
+                print(f"Unexpected {err=},\n {traceback.format_exc()}, "
+                      f"{type(err)=}")
+            print(f"The last {search_after_value=}")
+
+        return self.__create_dataframe(all_mapped_results, include_fields)
+
+    def __extract_query(self, query: dict):
+        if "query" in query.keys():
+            return query["query"]
+        return query
+
+    def __validate_size(self, size):
+        if size > 10000:
+            raise ValueError("Size must not be greater than 10000")
+
+    def __map_search_results(self, hits: Iterable):
+        hit: dict
+        for hit in hits:
+            row = dict()
+            row["_index"] = hit["_index"]
+            row["_id"] = hit["_id"]
+            row["_score"] = hit["_score"]
+            if "fields" in hit.keys():
+                row.update(
+                    {k: ", ".join(map(str, v)) for
+                     k, v in dict(hit["fields"]).items()}
+                )
+            yield row
+
+    def __create_dataframe(self, all_mapped_results, column_headers):
+        """
+        Create a Pandas DataFrame from the search results.
+
+        Parameters
+        ----------
+                all_mapped_results : list
+                    The list of mapped search results.
+                column_headers : list or None
+                    The list of column headers to include in the DataFrame.
+
+        Returns
+        -------
+            pandas.DataFrame
+                A DataFrame containing the search results.
+        """
+        df_headers = ["_index", "_id", "_score"]
+        if column_headers and "*" not in column_headers:
+            df_headers.extend(column_headers)
+            return pd.DataFrame(data=all_mapped_results, columns=df_headers)
+        return pd.DataFrame(data=all_mapped_results)
+
+
+def print_dataframe(df: pd.DataFrame, separator: str = "\\n"):
+    """
+    Replace <strong>separator</strong> string with HTML
+    <strong>&lt;br/&gt;</strong> tag for printing in Notebook
+
+    Parameters:
+    -----------
+        df : DataFrame
+            Input DataFrame
+        separator : str
+            Separator to be replaced with HTML <strong>&lt;br/&gt;</strong>
+    """
+    return display(HTML(df.to_html().replace(separator, "<br/>")))
+
+
+def list_chunker(user_list: list[Any], n: int) -> list[list[Any]]:
+    """
+    Divide a list into sublists of a specified size.
+
+    Parameters:
+    ----------
+        user_list : list[Any]
+            The list to be divided.
+        n : int
+            The size of the sublists.
+
+    Returns:
+    --------
+        list[list[Any]]: A list of sublists containing the elements of
+            the input list.
+    """
+    n = max(1, n)
+    return [user_list[i: i + n] for i in range(0, len(user_list), n)]
diff --git a/cogstack-es/credentials.py b/cogstack-es/credentials.py
new file mode 100644
index 000000000..9e2fa2d7c
--- /dev/null
+++ b/cogstack-es/credentials.py
@@ -0,0 +1,24 @@
+from typing import List
+# CogStack login details
+# Any questions on what these details are please contact
+# your local CogStack administrator.
+
+hosts: List[str] = [
+    # "https://cogstack-es-1:9200",
+    #  # This is an example of a CogStack ElasticSearch instance.
+ ]  # This is a list of your CogStack ElasticSearch instances.
+
+# These are your login details (either via http_auth or API) Should be in
+# string format
+username = None
+password = None
+# If you are using API key authentication
+# Use either "id" and "api_key" or "encoded" field, or both.
+api_key = {
+    # This is the API key id issued by your cogstack administrator.
+    "id": "",
+    # This is the api key issued by your cogstack administrator.
+    "api_key": "",
+    # This is the encoded api key issued by your cogstack administrator.
+    "encoded": "",
+}
diff --git a/cogstack-es/data/.keep b/cogstack-es/data/.keep
new file mode 100644
index 000000000..e69de29bb
diff --git a/cogstack-es/requirements-dev.txt b/cogstack-es/requirements-dev.txt
new file mode 100644
index 000000000..6896deb36
--- /dev/null
+++ b/cogstack-es/requirements-dev.txt
@@ -0,0 +1,6 @@
+mypy
+pandas-stubs
+types-tqdm
+pytest
+ruff
+nbconvert
diff --git a/cogstack-es/requirements.txt b/cogstack-es/requirements.txt
new file mode 100644
index 000000000..588e322b1
--- /dev/null
+++ b/cogstack-es/requirements.txt
@@ -0,0 +1,5 @@
+elasticsearch>=9.0.0,<10.0
+opensearch-py>=2.0.0
+ipython
+tqdm>=4.64,<5.0
+pandas>=2.2,<3.0
diff --git a/cogstack-es/search_template.ipynb b/cogstack-es/search_template.ipynb
new file mode 100644
index 000000000..8975e4f61
--- /dev/null
+++ b/cogstack-es/search_template.ipynb
@@ -0,0 +1,294 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Seaching CogStack\n",
+    "\n",
+    "This script is designed to be a template for cogstack searches"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from credentials import hosts, api_key, username, password\n",
+    "from cogstack import CogStack, print_dataframe"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Login and Initialise"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cs = CogStack.with_api_key_auth(hosts=hosts, api_key=api_key)\n",
+    "#cs = CogStack.with_basic_auth(hosts=hosts, username=username, password=password)\n",
+    "#cs = CogStack(hosts).use_api_key_auth(api_key=api_key)\n",
+    "#cs = CogStack(hosts).use_basic_auth(username=username, password=password)\n",
+    "#cs = CogStack(hosts).use_api_key_auth(\"\")\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Check the list of Indices and columns"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "View all indices and their aliases available to this user. Either index names or their aliases can be used to extract data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print_dataframe(cs.get_indices_and_aliases(), ', ')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "View fields/columns and their data types for provided index names or aliases"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "indices = [\"example_index\"] # <- CHANGE THIS\n",
+    "cs.get_index_fields(indices)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Set search query parameters"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pt_list = [ ]  # example list of patients' patient_TrustNumber here"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Columns of interest\n",
+    "\n",
+    "Select your fields and list in order of output columns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "columns = []"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Build query\n",
+    "\n",
+    "For further information on [how to build a query can be found here](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl.html)\n",
+    "\n",
+    "Further information on [free text string queries can be found here](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-simple-query-string-query.html)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "search_query =  {\n",
+    "                \"bool\": {\n",
+    "                #\"filter\": {\n",
+    "                #    \"terms\": {\n",
+    "                #    \"patient_TrustNumber\": pt_list\n",
+    "                #    }\n",
+    "                #},\n",
+    "                \"must\": [\n",
+    "                    {\n",
+    "                    \"query_string\": {\n",
+    "                        \"query\": \"\",\n",
+    "                         \"default_field\":\"\"\n",
+    "                    }\n",
+    "                    }\n",
+    "                ]\n",
+    "                }\n",
+    "}\n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Count the number of documents matching the search query\n",
+    "example_indices = [\"example-index\"]  # <- CHANGE THIS\n",
+    "cs.count_search_results(index=example_indices, query=search_query)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "# Search, Process, and Save\n",
+    "Use either of the functions to extract search results"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Read data using scan helper function.\n",
+    "# Does not provide a scroll id, so cannot be resumed if search fails midway.\n",
+    "example_indices = [\"example-index\"]  # <- CHANGE THIS\n",
+    "df = cs.read_data_with_scan(index=example_indices, query=search_query, include_fields=columns)\n",
+    "print(df)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Read data with scroll API and get scroll id if search fails midway. \n",
+    "# Can be used to resume the search from the failed scroll id.\n",
+    "example_indices = [\"example-index\"]  # <- CHANGE THIS\n",
+    "df = cs.read_data_with_scroll(index=example_indices, query=search_query, include_fields=columns)\n",
+    "print(df)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Read data with sorting and get search_after value if search fails midway.\n",
+    "# Can be used to resume the search from the failed search_after value but can be slower than scan or scroll methods for large datasets.\n",
+    "# Note: Sorting requires a field to sort by, which should be present in the index. Default sorting is by _id.\n",
+    "example_indices = [\"example-index\"]  # <- CHANGE THIS\n",
+    "df = cs.read_data_with_sorting(index=example_indices, query=search_query, \n",
+    "                               include_fields=columns)\n",
+    "print(df)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Process"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Whatever you want here\n",
+    "# For example, display the first few rows of the DataFrame\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Save"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Save the DataFrame to a CSV file\n",
+    "path_to_results = \"data/cogstack_search_results\"\n",
+    "file_name = \"file_name.csv\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df.to_csv(path_to_results + '\\\\' + file_name, index=False)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.13.5"
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/cogstack-es/tests/__init__.py b/cogstack-es/tests/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/cogstack-es/tests/test_cogstack.py b/cogstack-es/tests/test_cogstack.py
new file mode 100644
index 000000000..e025f1f61
--- /dev/null
+++ b/cogstack-es/tests/test_cogstack.py
@@ -0,0 +1,87 @@
+import pytest
+from unittest.mock import Mock, patch, MagicMock
+import pandas as pd
+from cogstack import CogStack
+
+
+@pytest.fixture
+def mock_elasticsearch():
+    """Fixture to mock Elasticsearch client"""
+    with patch('cogstack.es_cls') as mock_es:
+        mock_client = Mock()
+        mock_es.return_value = mock_client
+        mock_client.ping.return_value = True
+        print("With Mock ES...")
+        yield mock_es, mock_client
+    print("DONE WITH MOCK ES")
+
+
+def test_basic_auth_connection(mock_elasticsearch: tuple[MagicMock, Mock]):
+    """Test basic authentication"""
+    dunder_init, _ = mock_elasticsearch
+    with patch('builtins.input', return_value='test_user'):
+        with patch('getpass.getpass', return_value='test_pass'):
+            cs = CogStack.with_basic_auth(['http://localhost:9200'])
+            assert isinstance(cs, CogStack)
+            assert cs.elastic.ping()
+            dunder_init.assert_called_once()
+
+
+def test_api_key_auth_connection(mock_elasticsearch: tuple[MagicMock, Mock]):
+    """Test API key authentication"""
+    dunder_init, _ = mock_elasticsearch
+    api_key = {"encoded": "test_encoded_key"}
+    cs = CogStack.with_api_key_auth(['http://localhost:9200'], api_key)
+    assert isinstance(cs, CogStack)
+    assert cs.elastic.ping()
+    dunder_init.assert_called_once()
+
+
+def test_count_search_results(mock_elasticsearch: tuple[MagicMock, Mock]):
+    """Test count_search_results method"""
+    # Mock the count response
+    _, mock_inst = mock_elasticsearch
+    mock_inst.count.return_value = {'count': 100}
+
+    cs = CogStack(['http://localhost:9200'])
+    cs.elastic = mock_inst
+
+    query = {"match": {"title": "test"}}
+    result = cs.count_search_results('test_index', query)
+
+    assert "100" in result
+    mock_inst.count.assert_called_once()
+
+
+def test_read_data_with_scan(mock_elasticsearch: tuple[MagicMock, Mock]):
+    """Test read_data_with_scan method"""
+    _, mock_inst = mock_elasticsearch
+    # Mock scan results
+    mock_hits = MagicMock()
+    mock_hits.__iter__.return_value = [
+        {'_index': 'test_index', '_id': '1', '_score': 1.0,
+         'fields': {'title': 'test1'}},
+        {'_index': 'test_index', '_id': '2', '_score': 0.8,
+         'fields': {'title': 'test2'}}
+    ]
+
+    # Mock scan helper
+    with patch('cogstack.es_helpers.scan') as mock_scan, \
+         patch('cogstack.tqdm.tqdm') as mock_tqdm:
+
+        mock_scan.return_value = mock_hits
+        mock_tqdm.return_value = mock_hits  # Make tqdm iterable
+        mock_tqdm.total = 2
+
+        # Mock count for progress bar
+        mock_inst.count.return_value = {'count': 2}
+
+        cs = CogStack(['http://localhost:9200'])
+        cs.elastic = mock_inst
+
+        query = {"query": {"match": {"title": "test"}}}
+        result = cs.read_data_with_scan('test_index', query, ['title'])
+
+        assert isinstance(result, pd.DataFrame)
+        assert len(result) == 2
+        assert 'title' in result.columns
diff --git a/cogstack-es/tests/test_search_template_nb.py b/cogstack-es/tests/test_search_template_nb.py
new file mode 100644
index 000000000..b5ffa3fbf
--- /dev/null
+++ b/cogstack-es/tests/test_search_template_nb.py
@@ -0,0 +1,85 @@
+import nbformat
+from nbconvert import PythonExporter
+from unittest.mock import Mock, patch, MagicMock
+from contextlib import contextmanager
+import tempfile
+import os
+import pytest
+
+
+EXPECTED_TEMP_FILE_PATH = "data/cogstack_search_results\\file_name.csv"
+
+
+@contextmanager
+def all_mocked(python_code: str):
+    with tempfile.NamedTemporaryFile('w', suffix='.py') as temp_file:
+        temp_file.write(python_code)
+        with patch('cogstack.es_cls') as mock_es:
+            with patch('credentials.hosts', ['http://localhost:9200']):
+                with patch('credentials.api_key', {"encoded": "test_api_key"}):
+                    with patch('elasticsearch.helpers.scan') as mock_scan:
+                        with patch('tqdm.tqdm') as mock_tqdm:
+                            yield (temp_file.name, mock_es,
+                                   mock_scan, mock_tqdm)
+
+
+def setup_mocks(mock_es: MagicMock, mock_scan: MagicMock, mock_tqdm: MagicMock):
+    # Setup mocks
+    mock_client = Mock()
+    mock_es.return_value = mock_client
+    mock_client.ping.return_value = True
+
+    mock_aliases = Mock()
+    mock_aliases.body = {
+        'index1': {'aliases': {'alias1': {}}}
+    }
+    mock_mapping = Mock()
+    mock_mapping.body = {
+        'index1': {'mappings': {'properties': {}}}
+    }
+    # Mock Elasticsearch responses
+    mock_client.indices.get_alias.return_value = mock_aliases
+    mock_client.indices.get_mapping.return_value = mock_mapping
+    mock_client.count.return_value = {'count': 10}
+
+    # Mock scan results
+    mock_hits = MagicMock()
+    mock_hits.__iter__.return_value = [{
+        '_index': 'test', '_id': '1', '_score': 1.0,
+        'fields': {'test_field': ['value']}
+    }]
+    mock_scan.return_value = mock_hits
+    mock_tqdm.return_value = mock_hits
+    mock_tqdm.total = 1
+
+
+@pytest.fixture
+def temp_file_remover():
+    yield
+    if os.path.exists(EXPECTED_TEMP_FILE_PATH):
+        os.remove(EXPECTED_TEMP_FILE_PATH)
+
+
+def test_notebook_execution(temp_file_remover):
+    """Execute the notebook with mocked dependencies"""
+
+    # Read the notebook
+    notebook_path = 'search_template.ipynb'
+    with open(notebook_path, 'r') as f:
+        notebook = nbformat.read(f, as_version=4)
+
+    # Convert to Python code
+    exporter = PythonExporter()
+    python_code, _ = exporter.from_notebook_node(notebook)
+
+    # Mock all the dependencies
+    with all_mocked(python_code) as (temp_code_path, mock_es,
+                                     mock_scan, mock_tqdm):
+        setup_mocks(mock_es, mock_scan, mock_tqdm)
+
+        # Execute the notebook code
+        exec(python_code, {
+            '__file__': temp_code_path,
+            '__name__': '__main__'
+        })
+    assert os.path.exists(EXPECTED_TEMP_FILE_PATH)