In [2]:
# =========================
# CONFIG (EDIT THESE)
# =========================

HF_EMBED_URL = "https://prasaath-journal-embedding-api.hf.space/embed"

QDRANT_URL="https://71c8634b-2422-4ea8-9368-84dde5cedd55.us-west-1-0.aws.cloud.qdrant.io"
QDRANT_API_KEY="eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIn0.iFUgGYgrKLSMkd7chEnWPDENYklaG2ilQlPdITkCu2M"


PRIMARY_COLLECTION = "journal_primary_si"           # change if your collection name differs
ASSOC_COLLECTION   = "journal_associate_editor"     # change if your collection name differs

SQLITE_PATH = "./history.db"  # local file in journal_api root


In [3]:
import requests
import numpy as np
import pandas as pd

from qdrant_client import QdrantClient

# Your project logic (works because notebook is in journal_api root)
from app.core_logic import score_domains, build_query_text, normalize_key


In [4]:
def hf_embed(text: str) -> np.ndarray:
    r = requests.post(HF_EMBED_URL, json={"text": text}, timeout=120)
    r.raise_for_status()
    vec = np.array(r.json()["vector"], dtype=np.float32)
    return vec


In [9]:
client = QdrantClient(url=QDRANT_URL, api_key=QDRANT_API_KEY)

def qdrant_search(collection: str, qvec: np.ndarray, topk: int = 10):
    res = client.query_points(
        collection_name=collection,
        prefetch=[],
        query=qvec.tolist(),
        limit=topk,
        with_payload=True
    )
    return res.points



In [10]:
title = "Research on Urban Landscape and Environment Design Optimization Based on Remote Sensing Data and Artificial Intelligence Technology"

title_domain, top3_scores = score_domains(title)
qtext = build_query_text(title, top3_scores)

print("Title:", title)
print("Primary Domain:", title_domain)
print("Top3:", top3_scores[:3])
print("\nqtext:\n", qtext)


Title: Research on Urban Landscape and Environment Design Optimization Based on Remote Sensing Data and Artificial Intelligence Technology
Primary Domain: AI/ML & Data Science
Top3: [('AI/ML & Data Science', 1.0), ('Energy & Environment', 1.0), ('IoT & Embedded', 0.0)]

qtext:
 Research on Urban Landscape and Environment Design Optimization Based on Remote Sensing Data and Artificial Intelligence Technology | Domains: AI/ML & Data Science, Energy & Environment | Concepts: research, urban, landscape, and, environment, design, optimization, based, remote, sensing, data, artificial


In [11]:
qvec = hf_embed(qtext)

print("Vector dim:", qvec.shape[0])
print("Vector norm:", float(np.linalg.norm(qvec)))
print("First 5:", qvec[:5])


Vector dim: 768
Vector norm: 0.9999999403953552
First 5: [ 0.01426096  0.04364611 -0.0239819   0.0330342  -0.00731904]


In [12]:
topk = 10
primary_hits = qdrant_search(PRIMARY_COLLECTION, qvec, topk=topk)

print("PRIMARY hits:", len(primary_hits))
if primary_hits:
    print("PRIMARY top1 score:", float(primary_hits[0].score))
    p0 = primary_hits[0].payload or {}
    print("Top1 Journal:", p0.get("Journal_Name"))
    print("Top1 SI:", p0.get("Special_Issue_Name"))


PRIMARY hits: 10
PRIMARY top1 score: 0.6061356
Top1 Journal: Geoscience Letters
Top1 SI: Neural Networks for Environmental Impact Assessment Using Satellite Imagery


In [13]:
assoc_hits = qdrant_search(ASSOC_COLLECTION, qvec, topk=topk)

print("ASSOC hits:", len(assoc_hits))
if assoc_hits:
    print("ASSOC top1 score:", float(assoc_hits[0].score))
    a0 = assoc_hits[0].payload or {}
    print("Top1 Journal:", a0.get("Journal_Name"))


ASSOC hits: 10
ASSOC top1 score: 0.6216576
Top1 Journal: International Journal of Environmental Science and Technology


In [14]:
def hits_to_df(hits, source: str):
    rows = []
    for h in hits:
        payload = h.payload or {}
        rows.append({
            "source": source,
            "score": float(h.score),
            "Journal_Name": payload.get("Journal_Name", ""),
            "Special_Issue_Name": payload.get("Special_Issue_Name", ""),
            "_id": payload.get("_id", ""),
        })
    return pd.DataFrame(rows)

df_primary = hits_to_df(primary_hits, "PRIMARY")
df_assoc = hits_to_df(assoc_hits, "ASSOC")

df_all = pd.concat([df_primary, df_assoc], ignore_index=True)
df_all.sort_values("score", ascending=False).head(15)


Unnamed: 0,source,score,Journal_Name,Special_Issue_Name,_id
10,ASSOC,0.621658,International Journal of Environmental Science...,,65afc40219c4b93714cbc099
11,ASSOC,0.615186,Earth Science Informatics,,6625fdeeaef4498439cd1ac2
0,PRIMARY,0.606136,Geoscience Letters,Neural Networks for Environmental Impact Asses...,6833ea95f3df288cbba1bcc0
12,ASSOC,0.598124,Egyptian Journal of Remote Sensing and Space S...,,65d9a6876d134f21e6f45f63
1,PRIMARY,0.596939,Geoscience Data Journal,Neural Networks for Environmental Impact Asses...,6761ace06e68e0e9153ecbe4
2,PRIMARY,0.579502,Journal of Environmental & Earth Sciences,Advancement in Data Science for Remote Sensing...,6654785cc92090722d40a44f
13,ASSOC,0.575121,European Journal of Remote Sensing,,6596205cf4b8571a65b33215
14,ASSOC,0.554858,Intelligent Buildings International,,65da00f321fee3b45d3d2a74
15,ASSOC,0.553128,International Journal of Data Science and Anal...,,67ac2d436e68e0e915435071
16,ASSOC,0.546421,Information Processing and Management,,68ff2e3b7c526c1eaeb11bf8


In [15]:
import sqlite3

conn = sqlite3.connect(SQLITE_PATH)

pub_j = pd.read_sql_query("SELECT * FROM pub_j", conn)
rej_j = pd.read_sql_query("SELECT * FROM rej_j", conn)
pub_si = pd.read_sql_query("SELECT * FROM pub_si", conn)
rej_si = pd.read_sql_query("SELECT * FROM rej_si", conn)

print(len(pub_j), len(rej_j), len(pub_si), len(rej_si))


353 3017 546 4064


In [16]:
# Bring payload fields needed by your ranking logic
def hits_to_candidates(hits, source: str):
    rows = []
    for h in hits:
        payload = h.payload or {}
        row = dict(payload)
        row["sim"] = float(h.score)
        row["source"] = source
        row["candidate_text"] = payload.get("candidate_text", "")
        row["Journal_Name_norm"] = normalize_key(row.get("Journal_Name", ""))
        row["Special_Issue_Name_norm"] = normalize_key(row.get("Special_Issue_Name", ""))
        rows.append(row)
    return pd.DataFrame(rows)

cand = pd.concat(
    [
        hits_to_candidates(primary_hits, "PRIMARY"),
        hits_to_candidates(assoc_hits, "ASSOC")
    ],
    ignore_index=True
)

cand[["source","Journal_Name","Special_Issue_Name","sim"]].head()


Unnamed: 0,source,Journal_Name,Special_Issue_Name,sim
0,PRIMARY,Geoscience Letters,Neural Networks for Environmental Impact Asses...,0.606136
1,PRIMARY,Geoscience Data Journal,Neural Networks for Environmental Impact Asses...,0.596939
2,PRIMARY,Journal of Environmental & Earth Sciences,Advancement in Data Science for Remote Sensing...,0.579502
3,PRIMARY,Journal of Environmental & Earth Sciences,Geospatial Big Data Management Strategies for ...,0.533765
4,PRIMARY,Forest Science,AI-Driven Change Detection in the Remote Sensi...,0.527633


In [17]:
from app.core_logic import add_history_scores_from_aggregates

ranked = add_history_scores_from_aggregates(
    cand_df=cand,
    pub_j=pub_j, rej_j=rej_j,
    pub_si=pub_si, rej_si=rej_si,
    title=title,
    title_domain=title_domain
)

ranked[["source","Journal_Name","Special_Issue_Name","sim","final_score"]].head(15)


Unnamed: 0,source,Journal_Name,Special_Issue_Name,sim,final_score
11,ASSOC,Earth Science Informatics,,0.615186,0.746306
17,ASSOC,Water and Environment Journal,,0.546106,0.653249
10,ASSOC,International Journal of Environmental Science...,,0.621658,0.646408
16,ASSOC,Information Processing and Management,,0.546421,0.634076
12,ASSOC,Egyptian Journal of Remote Sensing and Space S...,,0.598124,0.601629
18,ASSOC,IEEE Internet of Things Journal,,0.53965,0.588615
1,PRIMARY,Geoscience Data Journal,Neural Networks for Environmental Impact Asses...,0.596939,0.582291
13,ASSOC,European Journal of Remote Sensing,,0.575121,0.56339
14,ASSOC,Intelligent Buildings International,,0.554858,0.560334
15,ASSOC,International Journal of Data Science and Anal...,,0.553128,0.547125
