<a href="https://colab.research.google.com/github/Kursat1907/Yarat-c-dusunme/blob/main/Makale_%C3%96zetleri_Arama_Arac%C4%B1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install gradio pandas requests

import gradio as gr
import pandas as pd
import requests

# 📡 Semantic Scholar API fonksiyonu
def fetch_papers(query, max_results):
    url = f"https://api.semanticscholar.org/graph/v1/paper/search"
    params = {
        "query": query,
        "limit": max_results,
        "fields": "title,abstract,year,authors,externalIds"
    }
    response = requests.get(url, params=params)
    data = response.json()
    print(f"Found {len(data.get('data', []))} results for query: {query}")


    results = []
    for item in data.get("data", []):
        title = item.get("title", "")
        abstract = item.get("abstract", "No abstract available")
        year = item.get("year", "")
        authors = ", ".join([a["name"] for a in item.get("authors", [])])
        doi = item.get("externalIds", {}).get("DOI", "")
        results.append({
            "Title": title,
            "Year": year,
            "Authors": authors,
            "DOI": doi,
            "Abstract": abstract
        })

    df = pd.DataFrame(results)
    return df, df.to_csv(index=False)

# 🌐 Gradio Arayüzü
with gr.Blocks() as demo:
    gr.Markdown("## 📚 Akademik Makale Ara (Semantic Scholar API)")

    query = gr.Textbox(label="🔍 Konu / Anahtar Kelime (örn: data mining)")
    max_results = gr.Slider(5, 50, value=10, step=1, label="🔢 Makale Sayısı")
    search_btn = gr.Button("Ara")

    output_table = gr.Dataframe(label="📄 Sonuçlar (Tablo)")
    download = gr.File(label="⬇️ CSV Dosyası", visible=False)

    def search_and_display(q, n):
        df, csv = fetch_papers(q, n)
        with open("results.csv", "w", encoding="utf-8") as f:
            f.write(csv)
        return df, "results.csv"

    search_btn.click(search_and_display, inputs=[query, max_results], outputs=[output_table, download])

demo.launch()

Collecting gradio
  Downloading gradio-5.30.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<25.0,>=22.0 (from gradio)
  Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.10.1 (from gradio)
  Downloading gradio_client-1.10.1-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.11.10-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<0.2.0,>=0.1.



In [None]:
!pip install gradio pandas requests nltk sklearn gensim pyLDAvis

import gradio as gr
import pandas as pd
import requests
import nltk
import re
from sklearn.feature_extraction.text import CountVectorizer
from gensim import corpora, models
import pyLDAvis.gensim_models
import tempfile
import os

nltk.download("stopwords")
nltk.download("punkt")
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

# 📡 Semantic Scholar API fonksiyonu
def fetch_papers(query, max_results):
    url = f"https://api.semanticscholar.org/graph/v1/paper/search"
    params = {
        "query": query,
        "limit": max_results,
        "fields": "title,abstract,year,authors,externalIds"
    }
    response = requests.get(url, params=params)
    data = response.json()

    results = []
    for item in data.get("data", []):
        title = item.get("title", "")
        abstract = item.get("abstract", "No abstract available")
        year = item.get("year", "")
        authors = ", ".join([a["name"] for a in item.get("authors", [])])
        doi = item.get("externalIds", {}).get("DOI", "")
        results.append({
            "Title": title,
            "Year": year,
            "Authors": authors,
            "DOI": doi,
            "Abstract": abstract
        })

    df = pd.DataFrame(results)
    return df, df.to_csv(index=False)

# 🧠 Temizleme ve LDA Analizi Fonksiyonu
def lda_topic_modeling(df, num_topics=5):
    stop_words = set(stopwords.words("english"))
    df_clean = df[df["Abstract"].notna() & (df["Abstract"].str.strip() != "")]
    topics, vis_path = lda_topic_modeling(df_clean, t)



    # Metin ön işleme
    def preprocess(text):
        text = re.sub(r"[^\w\s]", "", text.lower())
        tokens = word_tokenize(text)
        return [w for w in tokens if w not in stop_words and len(w) > 2]

    texts = df["Abstract"].dropna().apply(preprocess).tolist()

    dictionary = corpora.Dictionary(texts)
    corpus = [dictionary.doc2bow(text) for text in texts]

    lda_model = models.LdaModel(corpus=corpus, id2word=dictionary, num_topics=num_topics, random_state=42)
    topics = lda_model.print_topics(num_words=5)

    vis_html = None
    try:
        vis_data = pyLDAvis.gensim_models.prepare(lda_model, corpus, dictionary)
        temp_dir = tempfile.mkdtemp()
        html_path = os.path.join(temp_dir, "lda.html")
        pyLDAvis.save_html(vis_data, html_path)
        vis_html = html_path
    except Exception as e:
        vis_html = f"LDA Visualization Error: {e}"

    return topics, vis_html

# 🌐 Gradio Arayüzü
with gr.Blocks() as demo:
    gr.Markdown("## 📚 Akademik Makale Ara + LDA Konu Modelleme (Semantic Scholar)")

    query = gr.Textbox(label="🔍 Konu / Anahtar Kelime (örn: data mining)")
    max_results = gr.Slider(5, 50, value=10, step=1, label="🔢 Makale Sayısı")
    num_topics = gr.Slider(2, 10, value=5, step=1, label="🧠 LDA Konu Sayısı")
    search_btn = gr.Button("Ara")

    output_table = gr.Dataframe(label="📄 Sonuçlar (Tablo)")
    download = gr.File(label="⬇️ CSV Dosyası", visible=False)
    topic_output = gr.Textbox(label="🧠 LDA Konuları")
    vis_output = gr.File(label="📊 LDA Görselleştirme", visible=False)

def full_pipeline(q, n, t):
    df, csv = fetch_papers(q, n)

    # Boş özetleri filtrele
    df_clean = df[df["Abstract"].notna() & (df["Abstract"].str.strip() != "")]

    if df_clean.empty:
        return df, None, "Uygun özet bulunamadı.", None

    # CSV kaydet
    csv_filename = "results.csv"
    df_clean.to_csv(csv_filename, index=False)

    # LDA Analiz
    topics, vis_path = lda_topic_modeling(df_clean, t)

    if isinstance(topics, list):
        topics_str = "\n".join([f"T{i+1}: {t}" for i, t in enumerate(topics)])
    else:
        topics_str = topics

    vis_file = vis_path if isinstance(vis_path, str) and vis_path.endswith(".html") else None

    return df_clean, csv_filename, topics_str, vis_file


Collecting sklearn
  Using cached sklearn-0.0.post12.tar.gz (2.6 kB)
  [1;31merror[0m: [1msubprocess-exited-with-error[0m
  
  [31m×[0m [32mpython setup.py egg_info[0m did not run successfully.
  [31m│[0m exit code: [1;36m1[0m
  [31m╰─>[0m See above for output.
  
  [1;35mnote[0m: This error originates from a subprocess, and is likely not a problem with pip.
  Preparing metadata (setup.py) ... [?25l[?25herror
[1;31merror[0m: [1mmetadata-generation-failed[0m

[31m×[0m Encountered error while generating package metadata.
[31m╰─>[0m See above for output.

[1;35mnote[0m: This is an issue with the package mentioned above, not pip.
[1;36mhint[0m: See above for details.


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [None]:
!pip install gradio pandas requests nltk sklearn gensim pyLDAvis

import gradio as gr
import pandas as pd
import requests
import nltk
import re
from sklearn.feature_extraction.text import CountVectorizer
from gensim import corpora, models
import pyLDAvis.gensim_models
import tempfile
import os

nltk.download("stopwords")
nltk.download("punkt")
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

# 📡 Semantic Scholar API fonksiyonu
def fetch_papers(query, max_results):
    url = f"https://api.semanticscholar.org/graph/v1/paper/search"
    params = {
        "query": query,
        "limit": max_results,
        "fields": "title,abstract,year,authors,externalIds"
    }
    response = requests.get(url, params=params)
    data = response.json()

    results = []
    for item in data.get("data", []):
        title = item.get("title", "")
        abstract = item.get("abstract", "No abstract available")
        year = item.get("year", "")
        authors = ", ".join([a["name"] for a in item.get("authors", [])])
        doi = item.get("externalIds", {}).get("DOI", "")
        results.append({
            "Title": title,
            "Year": year,
            "Authors": authors,
            "DOI": doi,
            "Abstract": abstract
        })

    df = pd.DataFrame(results)
    return df, df.to_csv(index=False)

# 🧠 Temizleme ve LDA Analizi Fonksiyonu
def lda_topic_modeling(df, num_topics=5):
    stop_words = set(stopwords.words("english"))

    # Metin ön işleme
    def preprocess(text):
        text = re.sub(r"[^\w\s]", "", text.lower())
        tokens = word_tokenize(text)
        return [w for w in tokens if w not in stop_words and len(w) > 2]

    texts = df["Abstract"].dropna().apply(preprocess).tolist()

    dictionary = corpora.Dictionary(texts)
    corpus = [dictionary.doc2bow(text) for text in texts]

    lda_model = models.LdaModel(corpus=corpus, id2word=dictionary, num_topics=num_topics, random_state=42)
    topics = lda_model.print_topics(num_words=5)

    vis_html = None
    try:
        vis_data = pyLDAvis.gensim_models.prepare(lda_model, corpus, dictionary)
        temp_dir = tempfile.mkdtemp()
        html_path = os.path.join(temp_dir, "lda.html")
        pyLDAvis.save_html(vis_data, html_path)
        vis_html = html_path
    except Exception as e:
        vis_html = f"LDA Visualization Error: {e}"

    return topics, vis_html

# 🌐 Gradio Arayüzü
with gr.Blocks() as demo:
    gr.Markdown("## 📚 Akademik Makale Ara + LDA Konu Modelleme (Semantic Scholar)")

    query = gr.Textbox(label="🔍 Konu / Anahtar Kelime (örn: data mining)")
    max_results = gr.Slider(5, 50, value=10, step=1, label="🔢 Makale Sayısı")
    num_topics = gr.Slider(2, 10, value=5, step=1, label="🧠 LDA Konu Sayısı")
    search_btn = gr.Button("Ara")

    output_table = gr.Dataframe(label="📄 Sonuçlar (Tablo)")
    download = gr.File(label="⬇️ CSV Dosyası", visible=False)
    topic_output = gr.Textbox(label="🧠 LDA Konuları")
    vis_output = gr.File(label="📊 LDA Görselleştirme", visible=False)

    def full_pipeline(q, n, t):
        df, csv = fetch_papers(q, n)
        with open("results.csv", "w", encoding="utf-8") as f:
            f.write(csv)

        topics, vis_path = lda_topic_modeling(df, t)
        topics_str = "\n".join([f"T{i+1}: {t}" for i, t in enumerate(topics)])
        vis_file = vis_path if isinstance(vis_path, str) and vis_path.endswith(".html") else None

        return df, "results.csv", topics_str, vis_file

    search_btn.click(
        full_pipeline,
        inputs=[query, max_results, num_topics],
        outputs=[output_table, download, topic_output, vis_output]
    )

demo.launch()

Collecting sklearn
  Using cached sklearn-0.0.post12.tar.gz (2.6 kB)
  [1;31merror[0m: [1msubprocess-exited-with-error[0m
  
  [31m×[0m [32mpython setup.py egg_info[0m did not run successfully.
  [31m│[0m exit code: [1;36m1[0m
  [31m╰─>[0m See above for output.
  
  [1;35mnote[0m: This error originates from a subprocess, and is likely not a problem with pip.
  Preparing metadata (setup.py) ... [?25l[?25herror
[1;31merror[0m: [1mmetadata-generation-failed[0m

[31m×[0m Encountered error while generating package metadata.
[31m╰─>[0m See above for output.

[1;35mnote[0m: This is an issue with the package mentioned above, not pip.
[1;36mhint[0m: See above for details.


ModuleNotFoundError: No module named 'gensim'

In [None]:
!pip install gradio pandas requests nltk sklearn gensim pyLDAvis
import gensim
import gradio as gr
import pandas as pd
import requests
import nltk
import re
from sklearn.feature_extraction.text import CountVectorizer
from gensim import corpora, models # gensim is now imported after installation
import pyLDAvis.gensim_models
import tempfile
import os

nltk.download("stopwords")
nltk.download("punkt")
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

# 📡 Semantic Scholar API fonksiyonu
def fetch_papers(query, max_results):
    url = f"https://api.semanticscholar.org/graph/v1/paper/search"
    params = {
        "query": query,
        "limit": max_results,
        "fields": "title,abstract,year,authors,externalIds"
    }
    response = requests.get(url, params=params)
    data = response.json()

    results = []
    for item in data.get("data", []):
        title = item.get("title", "")
        abstract = item.get("abstract", "No abstract available")
        year = item.get("year", "")
        authors = ", ".join([a["name"] for a in item.get("authors", [])])
        doi = item.get("externalIds", {}).get("DOI", "")
        results.append({
            "Title": title,
            "Year": year,
            "Authors": authors,
            "DOI": doi,
            "Abstract": abstract
        })

    df = pd.DataFrame(results)
    return df, df.to_csv(index=False)

# 🧠 Temizleme ve LDA Analizi Fonksiyonu
def lda_topic_modeling(df, num_topics=5):
    stop_words = set(stopwords.words("english"))

    # Metin ön işleme
    def preprocess(text):
        text = re.sub(r"[^\w\s]", "", text.lower())
        tokens = word_tokenize(text)
        return [w for w in tokens if w not in stop_words and len(w) > 2]

    texts = df["Abstract"].dropna().apply(preprocess).tolist()

    dictionary = corpora.Dictionary(texts)
    corpus = [dictionary.doc2bow(text) for text in texts]

    lda_model = models.LdaModel(corpus=corpus, id2word=dictionary, num_topics=num_topics, random_state=42)
    topics = lda_model.print_topics(num_words=5)

    vis_html = None
    try:
        vis_data = pyLDAvis.gensim_models.prepare(lda_model, corpus, dictionary)
        temp_dir = tempfile.mkdtemp()
        html_path = os.path.join(temp_dir, "lda.html")
        pyLDAvis.save_html(vis_data, html_path)
        vis_html = html_path
    except Exception as e:
        vis_html = f"LDA Visualization Error: {e}"

    return topics, vis_html

# 🌐 Gradio Arayüzü
with gr.Blocks() as demo:
    gr.Markdown("## 📚 Akademik Makale Ara + LDA Konu Modelleme (Semantic Scholar)")

    query = gr.Textbox(label="🔍 Konu / Anahtar Kelime (örn: data mining)")
    max_results = gr.Slider(5, 50, value=10, step=1, label="🔢 Makale Sayısı")
    num_topics = gr.Slider(2, 10, value=5, step=1, label="🧠 LDA Konu Sayısı")
    search_btn = gr.Button("Ara")

    output_table = gr.Dataframe(label="📄 Sonuçlar (Tablo)")
    download = gr.File(label="⬇️ CSV Dosyası", visible=False)
    topic_output = gr.Textbox(label="🧠 LDA Konuları")
    vis_output = gr.File(label="📊 LDA Görselleştirme", visible=False)

    def full_pipeline(q, n, t):
        df, csv = fetch_papers(q, n)
        with open("results.csv", "w", encoding="utf-8") as f:
            f.write(csv)

        topics, vis_path = lda_topic_modeling(df, t)
        topics_str = "\n".join([f"T{i+1}: {t}" for i, t in enumerate(topics)])
        vis_file = vis_path if isinstance(vis_path, str) and vis_path.endswith(".html") else None

        return df, "results.csv", topics_str, vis_file

    search_btn.click(
        full_pipeline,
        inputs=[query, max_results, num_topics],
        outputs=[output_table, download, topic_output, vis_output]
    )

demo.launch()

Collecting sklearn
  Using cached sklearn-0.0.post12.tar.gz (2.6 kB)
  [1;31merror[0m: [1msubprocess-exited-with-error[0m
  
  [31m×[0m [32mpython setup.py egg_info[0m did not run successfully.
  [31m│[0m exit code: [1;36m1[0m
  [31m╰─>[0m See above for output.
  
  [1;35mnote[0m: This error originates from a subprocess, and is likely not a problem with pip.
  Preparing metadata (setup.py) ... [?25l[?25herror
[1;31merror[0m: [1mmetadata-generation-failed[0m

[31m×[0m Encountered error while generating package metadata.
[31m╰─>[0m See above for output.

[1;35mnote[0m: This is an issue with the package mentioned above, not pip.
[1;36mhint[0m: See above for details.


ModuleNotFoundError: No module named 'gensim'

In [None]:
!pip install gradio
!pip install requests
!pip install nltk
!pip install sklearn
!pip install gensim
!pip install pyLDAvis



Collecting gradio
  Using cached gradio-5.30.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<25.0,>=22.0 (from gradio)
  Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.10.1 (from gradio)
  Downloading gradio_client-1.10.1-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.11.10-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<0.2.0,>=0.1

In [None]:
!pip install numpy==1.26.4 scipy==1.13.1 gensim==4.3.3



Collecting numpy==1.26.4
  Using cached numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
Collecting scipy==1.13.1
  Using cached scipy-1.13.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (60 kB)
Using cached numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.3 MB)
Using cached scipy-1.13.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (38.6 MB)
Installing collected packages: numpy, scipy
  Attempting uninstall: numpy
    Found existing installation: numpy 2.2.6
    Uninstalling numpy-2.2.6:
      Successfully uninstalled numpy-2.2.6
  Attempting uninstall: scipy
    Found existing installation: scipy 1.14.0
    Uninstalling scipy-1.14.0:
      Successfully uninstalled scipy-1.14.0
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
tsfresh 0.21.0 requires scipy

In [None]:
!pip uninstall numpy scipy
!pip install numpy==1.26.4 scipy==1.13.1
!pip install gensim nltk pandas pyLDAvis gradio


Found existing installation: numpy 1.26.4
Uninstalling numpy-1.26.4:
  Would remove:
    /usr/local/bin/f2py
    /usr/local/lib/python3.11/dist-packages/numpy-1.26.4.dist-info/*
    /usr/local/lib/python3.11/dist-packages/numpy.libs/libgfortran-040039e1.so.5.0.0
    /usr/local/lib/python3.11/dist-packages/numpy.libs/libopenblas64_p-r0-0cf96a72.3.23.dev.so
    /usr/local/lib/python3.11/dist-packages/numpy.libs/libquadmath-96973f99.so.0.0.0
    /usr/local/lib/python3.11/dist-packages/numpy/*
Proceed (Y/n)? y
  Successfully uninstalled numpy-1.26.4
Found existing installation: scipy 1.13.1
Uninstalling scipy-1.13.1:
  Would remove:
    /usr/local/lib/python3.11/dist-packages/scipy-1.13.1.dist-info/*
    /usr/local/lib/python3.11/dist-packages/scipy.libs/libgfortran-040039e1.so.5.0.0
    /usr/local/lib/python3.11/dist-packages/scipy.libs/libopenblasp-r0-01191904.3.27.so
    /usr/local/lib/python3.11/dist-packages/scipy.libs/libquadmath-96973f99.so.0.0.0
    /usr/local/lib/python3.11/dist-p

In [None]:
import numpy as np
import pandas as pd


In [None]:
import gradio as gr
import pandas as pd
import requests
import nltk
import re
import tempfile
import os
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from gensim import corpora, models
import pyLDAvis.gensim_models

# NLTK veri setlerini indir
nltk.download("stopwords")
nltk.download("punkt")

# 📡 Semantic Scholar API fonksiyonu
def fetch_papers(query, max_results):
    url = f"https://api.semanticscholar.org/graph/v1/paper/search"
    params = {
        "query": query,
        "limit": max_results,
        "fields": "title,abstract,year,authors,externalIds"
    }
    response = requests.get(url, params=params)

    # Hata kontrolü
    if response.status_code != 200:
        return pd.DataFrame(), f"API Error: {response.status_code} - {response.text}"

    data = response.json()
    results = []
    for item in data.get("data", []):
        title = item.get("title", "")
        abstract = item.get("abstract", "")
        year = item.get("year", "")
        authors = ", ".join([a["name"] for a in item.get("authors", [])])
        doi = item.get("externalIds", {}).get("DOI", "")
        results.append({
            "Title": title,
            "Year": year,
            "Authors": authors,
            "DOI": doi,
            "Abstract": abstract
        })

    df = pd.DataFrame(results)
    return df, df.to_csv(index=False)

# 🧠 LDA Konu Modelleme
def lda_topic_modeling(df, num_topics=5):
    stop_words = set(stopwords.words("english"))

    def preprocess(text):
        text = re.sub(r"[^\w\s]", "", text.lower())
        tokens = word_tokenize(text)
        return [w for w in tokens if w not in stop_words and len(w) > 2]

    # Sadece boş olmayan özetler
    df_clean = df[df["Abstract"].notna() & (df["Abstract"].str.strip() != "")]
    if df_clean.empty:
        return "No valid abstracts for topic modeling.", None

    texts = df_clean["Abstract"].apply(preprocess).tolist()

    dictionary = corpora.Dictionary(texts)
    corpus = [dictionary.doc2bow(text) for text in texts]

    if not any(corpus):
        return "LDA: Not enough valid content in abstracts.", None

    lda_model = models.LdaModel(corpus=corpus, id2word=dictionary, num_topics=num_topics, random_state=42)
    topics = lda_model.print_topics(num_words=5)

    try:
        vis_data = pyLDAvis.gensim_models.prepare(lda_model, corpus, dictionary)
        temp_dir = tempfile.mkdtemp()
        html_path = os.path.join(temp_dir, "lda.html")
        pyLDAvis.save_html(vis_data, html_path)
        return topics, html_path
    except Exception as e:
        return f"LDA Visualization Error: {e}", None

# 🌐 Gradio Arayüzü
with gr.Blocks() as demo:
    gr.Markdown("## 📚 Akademik Makale Ara + LDA Konu Modelleme (Semantic Scholar)")

    query = gr.Textbox(label="🔍 Anahtar Kelime (örn: machine learning)")
    max_results = gr.Slider(5, 50, value=10, step=1, label="🔢 Makale Sayısı")
    num_topics = gr.Slider(2, 10, value=5, step=1, label="🧠 Konu Sayısı (LDA)")
    search_btn = gr.Button("Ara")

    output_table = gr.Dataframe(label="📄 Sonuçlar (Tablo)")
    download = gr.File(label="⬇️ CSV", visible=False)
    topic_output = gr.Textbox(label="🧠 LDA Konuları")
    vis_output = gr.File(label="📊 LDA Görselleştirme (HTML)", visible=False)

    def full_pipeline(q, n, t):
        df, csv = fetch_papers(q, n)

        # API'den hata dönerse (csv string olur)
        if isinstance(csv, str) and csv.startswith("API Error"):
            return pd.DataFrame(), None, csv, None

        df_clean = df[df["Abstract"].notna() & (df["Abstract"].str.strip() != "")]
        if df_clean.empty:
            return df, None, "❗ Hiçbir geçerli özet bulunamadı.", None

        csv_filename = "results.csv"
        df_clean.to_csv(csv_filename, index=False)

        topics, vis_path = lda_topic_modeling(df_clean, t)

        topics_str = "\n".join([f"T{i+1}: {t}" for i, t in enumerate(topics)]) if isinstance(topics, list) else topics
        vis_file = vis_path if isinstance(vis_path, str) and vis_path.endswith(".html") else None

        return df_clean, csv_filename, topics_str, vis_file

    search_btn.click(
        full_pipeline,
        inputs=[query, max_results, num_topics],
        outputs=[output_table, download, topic_output, vis_output]
    )

demo.launch()


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://5fd41753b224f19c5c.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


