In [None]:
import google.generativeai as genai
import csv
import time  # for rate-limiting if needed

In [None]:
import json

In [None]:
import requests

API_KEY = "Vs5X8oxztw9W8Uc5OUWEP8eKQgi3apKcaHxVy63a"
SEARCH_URL = "https://api.semanticscholar.org/graph/v1/paper/search"
DETAILS_URL = "https://api.semanticscholar.org/graph/v1/paper/"

HEADERS = {
    "x-api-key": API_KEY
}

def search_papers(query, limit=20):
    params = {
        "query": query,
        "fields": "title,abstract,year,citationCount,authors,url",
        "limit": limit,
        "offset": 0
    }
    response = requests.get(SEARCH_URL, headers=HEADERS, params=params)
    if response.status_code == 200:
        return response.json().get("data", [])
    else:
        print("Search Error:", response.status_code, response.text)
        return []

def rank_papers(papers):
    # Rank by: citations * recency_weight
    current_year = 2025
    for paper in papers:
        year = paper.get("year", 2000)
        citations = paper.get("citationCount", 0)
        recency_weight = 1 + (year - 2000) / 25  # More recent gets slight boost
        paper["score"] = citations * recency_weight
    return sorted(papers, key=lambda x: x["score"], reverse=True)

def retrieve_top_papers(query, top_k=5):
    collected = []
    offset = 0
    limit = 20
    max_pages = 10  # Avoid infinite loops

    while len(collected) < top_k and max_pages > 0:
        params = {
            "query": query,
            "fields": "title,abstract,year,citationCount,authors,url",
            "limit": limit,
            "offset": offset
        }
        response = requests.get(SEARCH_URL, headers=HEADERS, params=params)
        if response.status_code != 200:
            print("Search Error:", response.status_code, response.text)
            break

        papers = response.json().get("data", [])
        if not papers:
            break

        for paper in papers:
            if paper.get("abstract"):  # Only include papers with valid abstract
                collected.append(paper)
                if len(collected) == top_k:
                    break

        offset += limit
        max_pages -= 1

    if len(collected) < top_k:
        print(f"⚠️ Only {len(collected)} papers found with abstracts.")

    # Rank the filtered papers
    ranked = rank_papers(collected)
    top_papers = ranked[:top_k]

    # Return dictionary format
    results = {}
    for idx, paper in enumerate(top_papers, 1):
        results[f"Paper {idx}"] = {
            "title": paper["title"],
            "abstract": paper["abstract"],
            "year": paper["year"],
            "citations": paper["citationCount"],
            "url": paper.get("url", "")
        }

    return results


In [None]:
# ------------------ EXAMPLE USAGE ------------------ #
if __name__ == "__main__":
    import json

    user_query = "Brain Tumor Segmentation using Machine Learning"
    results_dict = retrieve_top_papers(user_query)

    # Save to JSON file
    with open("ml.json", "w", encoding="utf-8") as f:
        json.dump(results_dict, f, ensure_ascii=False, indent=4)

    print("📄 Results saved to top_climate_papers.json in dictionary format.")


📄 Results saved to top_climate_papers.json in dictionary format.


**Now, we have extracted the data (in json format) from one API, and then sending it into another API to do further analysis on the retrieved research Papers**

In [None]:
# Configure Gemini
genai.configure(api_key='AIzaSyDBCwgbIw4q1jqIbVfIA1Ax4Gl_O0vAbYY')
model = genai.GenerativeModel("models/gemini-1.5-flash")

In [None]:
GEMINI_URL = "https://generativelanguage.googleapis.com/v1/models/gemini-1.5-flash:generateContent?key=" + "AIzaSyDBCwgbIw4q1jqIbVfIA1Ax4Gl_O0vAbYY"

In [None]:
# # 🧠 Prompt Template
# def make_prompt(abstract):
#     return f"""
# You are a research assistant performing a literature survey.

# Given the abstract of a research paper, respond exactly with the following fields in plain text:
# Paper Title: ...
# Summary: ...
# Challenges: ...
# Scope for Improvement: ...
# (Tip: If any of the following sections is not properly mentioned, could u please try to insert it based on the context.)
# ### Abstract:
# {abstract}
# """


In [None]:
def analyze_abstract(json_path):
    # Load abstract data
    with open(json_path, "r", encoding="utf-8") as f:
        papers = json.load(f)

    analyzed_results = {}

    for paper_id, paper in papers.items():
        abstract = paper.get("abstract", "")
        if not abstract:
            continue

        prompt = f"""You are a research assistant. Analyze the following abstract and provide:
                      1. A brief summary
                      2. Key challenges mentioned or implied
                      3. Scope for improvement or future work
                (Tip: If any of the following sections is not properly mentioned, could u please try to insert it based on the context.)
Abstract:
\"\"\"
{abstract}
\"\"\""""

        payload = {
            "contents": [
                {
                    "parts": [{"text": prompt}]
                }
            ]
        }

        response = requests.post(GEMINI_URL, json=payload)
        if response.status_code == 200:
            gemini_reply = response.json()
            generated_text = gemini_reply["candidates"][0]["content"]["parts"][0]["text"]

            analyzed_results[paper_id] = {
                "title": paper["title"],
                "year": paper["year"],
                "citations": paper["citations"],
                "url": paper["url"],
                "analysis": generated_text.strip()
            }
            print(f"✅ Processed {paper_id}")
        else:
            print(f"❌ Failed {paper_id} - {response.status_code}")
            analyzed_results[paper_id] = {
                "title": paper["title"],
                "error": response.text
            }

    return analyzed_results


In [None]:
# # List of abstracts to analyze
# abstracts = [
#     """One of the aspects of quantum theory which has attracted the most general attention, is the novelty of the logical notions which it presupposes. It asserts that even a complete mathematical description of a physical system S does not in general enable one to predict with certainty the result of an experiment on S, and that in particular one can never predict with certainty both the position and the momentum of S, (Heisenberg’s Uncertainty Principle). It further asserts that most pairs of observations are incompatible, and cannot be made on S, simultaneously (Principle of Non-commutativity of Observations).""",
#     """ To cope with real-world dynamics, an intelligent system needs to incrementally acquire, update, accumulate, and exploit knowledge throughout its lifetime. This ability, known as continual learning, provides a foundation for AI systems to develop themselves adaptively. In a general sense, continual learning is explicitly limited by catastrophic forgetting, where learning a new task usually results in a dramatic performance drop of the old tasks. Beyond this, increasingly numerous advances have emerged in recent years that largely extend the understanding and application of continual learning. The growing and widespread interest in this direction demonstrates its realistic significance as well as complexity. In this work, we present a comprehensive survey of continual learning, seeking to bridge the basic settings, theoretical foundations, representative methods, and practical applications. Based on existing theoretical and empirical results, we summarize the general objectives of continual learning as ensuring a proper stability-plasticity trade-off and an adequate intra/inter-task generalizability in the context of resource efficiency. Then we provide a state-of-the-art and elaborated taxonomy, extensively analyzing how representative strategies address continual learning, and how they are adapted to particular challenges in various applications. Through an in-depth discussion of promising directions, we believe that such a holistic perspective can greatly facilitate subsequent exploration in this field and beyond."""
#     """Brain Tumor Segmentation Using Deep Learning by Type Specific Sorting of Images. Recently deep learning has been playing a major role in the field of computer vision. One of its applications is the reduction of human judgment in the diagnosis of diseases. Especially, brain tumor diagnosis requires high accuracy, where minute errors in judgment may lead to disaster. For this reason, brain tumor segmentation is an important challenge for medical purposes. Currently several methods exist for tumor segmentation but they all lack high accuracy. Here we present a solution for brain tumor segmenting by using deep learning. In this work, we studied different angles of brain MR images and applied different networks for segmentation. The effect of using separate networks for segmentation of MR images is evaluated by comparing the results with a single network. Experimental evaluations of the networks show that Dice score of 0.73 is achieved for a single network and 0.79 in obtained for multiple networks."""
#     # Add more abstracts here
# ]


In [None]:
if __name__ == "__main__":
    results = analyze_abstract("top_climate_papers.json")

    with open("analyzed_climate_papers.json", "w", encoding="utf-8") as f:
        json.dump(results, f, ensure_ascii=False, indent=4)

    print("📊 Analysis saved to analyzed_climate_papers.json")


✅ Processed Paper 1
✅ Processed Paper 2
✅ Processed Paper 3
✅ Processed Paper 4
✅ Processed Paper 5
📊 Analysis saved to analyzed_climate_papers.json


In [None]:
# 🧾 Store all processed data (as dictionary) in json fromat
results = {}

for idx, abstract in enumerate(abstracts, start=1):
    try:
        data = analyze_abstract(abstract, idx)
        results[f"Paper {idx}"] = data
        print(f"✅ Processed Paper {idx}")
        time.sleep(1)
    except Exception as e:
        print(f"❌ Error processing Paper {idx}: {e}")

# 💾 Save to JSON
with open('literature_survey.json', 'w', encoding='utf-8') as f:
    json.dump(results, f, ensure_ascii=False, indent=4)

print("📄 literature_survey.json created successfully!")

✅ Processed Paper 1
✅ Processed Paper 2
📄 literature_survey.json created successfully!


Gradio Interface

In [None]:
import gradio as gr
import json
import requests

# --- API KEYS ---
SEMANTIC_SCHOLAR_API_KEY = "Vs5X8oxztw9W8Uc5OUWEP8eKQgi3apKcaHxVy63a"
GEMINI_API_KEY = "AIzaSyDBCwgbIw4q1jqIbVfIA1Ax4Gl_O0vAbYY"

# --- ENDPOINTS ---
SEARCH_URL = "https://api.semanticscholar.org/graph/v1/paper/search"
HEADERS = {"x-api-key": SEMANTIC_SCHOLAR_API_KEY}
GEMINI_URL = f"https://generativelanguage.googleapis.com/v1/models/gemini-1.5-flash:generateContent?key={GEMINI_API_KEY}"


# STEP 1: Search and retrieve papers with a balance of citations and recency
def retrieve_top_papers(query, top_k_cited=3, top_k_recent=2):
    collected = []
    offset = 0
    limit = 20
    max_pages = 10

    while len(collected) < 100 and max_pages > 0:
        params = {
            "query": query,
            "fields": "title,abstract,year,citationCount,authors,url",
            "limit": limit,
            "offset": offset
        }
        response = requests.get(SEARCH_URL, headers=HEADERS, params=params)
        if response.status_code != 200:
            print("Search Error:", response.status_code)
            break

        papers = response.json().get("data", [])
        if not papers:
            break

        for paper in papers:
            if paper.get("abstract"):
                collected.append(paper)

        offset += limit
        max_pages -= 1

    current_year = 2025

    # Top cited papers (irrespective of year)
    cited_sorted = sorted(
        collected, key=lambda x: x.get("citationCount", 0), reverse=True)

    # Recent and reasonably cited (e.g., year >= 2020 and citations >= 50)
    recent_sorted = sorted(
        [p for p in collected if p.get("year", 0) >= current_year - 5 and p.get("citationCount", 0) >= 50],
        key=lambda x: (x.get("year", 0), x.get("citationCount", 0)),
        reverse=True
    )

    final_papers = cited_sorted[:top_k_cited] + recent_sorted[:top_k_recent]

    # Remove duplicates (same title)
    seen_titles = set()
    unique_final = []
    for p in final_papers:
        if p["title"] not in seen_titles:
            seen_titles.add(p["title"])
            unique_final.append(p)

    result_dict = {}
    for idx, paper in enumerate(unique_final, 1):
        result_dict[f"Paper {idx}"] = {
            "title": paper["title"],
            "abstract": paper["abstract"],
            "year": paper["year"],
            "citations": paper["citationCount"],
            "url": paper.get("url", "")
        }

    return result_dict


# STEP 2: Analyze abstract using Gemini
def analyze_abstract_dict(papers_dict):
    analyzed = {}

    for paper_id, paper in papers_dict.items():
        abstract = paper["abstract"]
        prompt = f"""You are a research assistant. Analyze the following abstract and provide:
1. A brief summary
2. Key challenges mentioned or implied
3. Authors' perspective
4. Scope for improvement or future work
(Tip: If any of the following sections is not properly mentioned, please infer and fill it based on context.)
Abstract:
\"\"\"{abstract}\"\"\""""

        payload = {
            "contents": [
                {
                    "parts": [{"text": prompt}]
                }
            ]
        }

        response = requests.post(GEMINI_URL, json=payload)
        if response.status_code == 200:
            gemini_reply = response.json()
            generated_text = gemini_reply["candidates"][0]["content"]["parts"][0]["text"]
            analyzed[paper_id] = {
                "title": paper["title"],
                "year": paper["year"],
                "citations": paper["citations"],
                "url": paper["url"],
                "analysis": generated_text.strip()
            }
        else:
            analyzed[paper_id] = {
                "title": paper["title"],
                "error": f"Error: {response.status_code}"
            }

    return analyzed


# STEP 3: Combined pipeline
def literature_survey(query):
    papers = retrieve_top_papers(query)
    if not papers:
        return "❌ No papers found with abstracts for your query."

    analysis = analyze_abstract_dict(papers)

    output = ""
    for pid, data in analysis.items():
        output += f"### {pid}: {data['title']}\n"
        output += f"- 📅 Year: {data.get('year', 'N/A')} | 📊 Citations: {data.get('citations', 'N/A')}\n"
        output += f"- 🔗 [Link to Paper]({data.get('url', '#')})\n"
        output += f"#### 🔍 Gemini Analysis:\n{data.get('analysis', data.get('error', 'No analysis'))}\n\n"
        output += "---\n"

    return output


# STEP 4: Launch Gradio Interface
interface = gr.Interface(
    fn=literature_survey,
    inputs=gr.Textbox(placeholder="Enter a Research Topic:- "),
    outputs=gr.Markdown(),
    title="📚 AI-Powered Literature Survey Assistant",
    description="Enter a research query. This app retrieves top foundational and recent relevant papers with abstracts, and uses Gemini AI to analyze them."
)

if __name__ == "__main__":
    interface.launch(debug = True)

It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://39d5dd88c2e97e0053.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
