In [46]:
from elasticsearch import Elasticsearch

In [47]:
es = Elasticsearch(
    "https://localhost:9200",
    basic_auth=("elastic", "-cC8rBqKeq_ESUrF7nCi"),
    ca_certs=r"C:\Users\Samee\http_ca.crt",
    verify_certs=True
)
es.ping()

True

## Prepare the data

In [48]:
import pandas as pd

df = pd.read_csv("researchpaper.csv")
new_df=df[["Title","Paper_ID","Abstract","Keywords","Description"]]

In [49]:
new_df.head()

Unnamed: 0,Title,Paper_ID,Abstract,Keywords,Description
0,Outer Space Exploration and the SustainabiIity...,PA001,In contrast with the early years of space flig...,Outer space; sustainability; space debris; out...,"Space exploration, once limited to states, now..."
1,Building an AI-based Model to Extract and Clas...,PA002,"Medical history forms, often lacking standardi...",AI-based model\n\nMedical history forms (PMH f...,"This research paper, ""Building an AI-based Mod..."
2,Human Health during Space Travel: State-of-the...,PA003,The field of human space travel is in the mids...,"human health, space travel, space mission, spa...",\nThis paper provides a state-of-the-art revie...
3,Problems Astronauts Face During Space Missions,PA004,Space missions expose astronauts to unique and...,Astronaut health; space missions; microgravity...,This research paper provides a comprehensive a...
4,The Role of Artificial Intelligence in Future ...,PA005,Future deep-space missions to Mars and beyond ...,Artificial intelligence; astronaut healthcare;...,This research paper investigates how Artificia...


In [50]:
new_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 42 entries, 0 to 41
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   Title        42 non-null     object
 1   Paper_ID     42 non-null     object
 2   Abstract     42 non-null     object
 3   Keywords     41 non-null     object
 4   Description  42 non-null     object
dtypes: object(5)
memory usage: 1.8+ KB


In [51]:
new_df.fillna("None", inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df.fillna("None", inplace=True)


## Convert the relevant field to Vector using BERT model

In [52]:
from sentence_transformers import SentenceTransformer
model = SentenceTransformer('all-mpnet-base-v2')

In [53]:
new_df["DescriptionVector"] = new_df["Description"].apply(lambda x: model.encode(x))

In [54]:
new_df.head()

Unnamed: 0,Title,Paper_ID,Abstract,Keywords,Description,DescriptionVector
0,Outer Space Exploration and the SustainabiIity...,PA001,In contrast with the early years of space flig...,Outer space; sustainability; space debris; out...,"Space exploration, once limited to states, now...","[0.08122048, 0.054946978, 0.027159322, -0.0032..."
1,Building an AI-based Model to Extract and Clas...,PA002,"Medical history forms, often lacking standardi...",AI-based model\n\nMedical history forms (PMH f...,"This research paper, ""Building an AI-based Mod...","[0.03583035, 0.03543252, -0.0033567206, -0.044..."
2,Human Health during Space Travel: State-of-the...,PA003,The field of human space travel is in the mids...,"human health, space travel, space mission, spa...",\nThis paper provides a state-of-the-art revie...,"[0.030508902, 0.035693463, -0.041795213, -0.09..."
3,Problems Astronauts Face During Space Missions,PA004,Space missions expose astronauts to unique and...,Astronaut health; space missions; microgravity...,This research paper provides a comprehensive a...,"[0.057593938, 0.0055773044, -0.018906672, -0.0..."
4,The Role of Artificial Intelligence in Future ...,PA005,Future deep-space missions to Mars and beyond ...,Artificial intelligence; astronaut healthcare;...,This research paper investigates how Artificia...,"[0.046859734, 0.016425988, -0.04458429, -0.064..."


In [37]:
es.ping()

True

## Create new index in ElasticSearch!

In [56]:
from indexMapping import indexMapping

es.indices.create(index="my_researh_paper", mappings=indexMapping)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'my_researh_paper'})

## Ingest the data into index

In [57]:
record_list = new_df.to_dict("records")

In [58]:
for record in record_list:
    try:
        es.index(index="my_research_paper", document=record, id=record["Paper_ID"])
    except Exception as e:
        print(e)

In [59]:
es.count(index="my_research_paper")

ObjectApiResponse({'count': 42, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}})

## Search the data

In [44]:
input_keyword = "sustainable space"
vector_of_input_keyword = model.encode(input_keyword).tolist()

query = {
    "knn": {
        "field": "DescriptionVector",
        "query_vector": vector_of_input_keyword,
        "k": 10,
        "num_candidates": 500
    }
}

res = es.search(
    index="all_papers",
    body={
        **query,
        "_source": ["Title", "Description"]
    }
)

for hit in res["hits"]["hits"]:
    print(hit["_source"])


{'Title': 'Outer Space Exploration and the SustainabiIity of the Space Environment-An Uneasy Relationship', 'Description': 'Space exploration, once limited to states, now involves private companies, leading to a more congested and competitive outer space environment. [cite_start]This poses challenges to the sustainability of the space environment, particularly due to the proliferation of space debris and the lack of protection for vulnerable cultural and historical sites on celestial bodies[cite: 30, 31, 32, 33].\n\n[cite_start]Current space law, as embodied in the Outer Space Treaty, is criticized for its anthropocentric (human-centered) approach, prioritizing human interests and the exploitation of resources over environmental protection[cite: 238, 239, 240, 241, 242, 243, 244]. [cite_start]The treaty\'s provisions are often vague and do not explicitly address issues like space debris or the preservation of space heritage[cite: 458, 459, 460, 477, 478, 728]. [cite_start]While some in

TypeError: Positional arguments can't be used with Elasticsearch API methods. Instead only use keyword arguments.