In [1]:
pip install wikipedia-api

Collecting wikipedia-api
  Downloading wikipedia_api-0.7.1.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: wikipedia-api
  Building wheel for wikipedia-api (setup.py) ... [?25l[?25hdone
  Created wheel for wikipedia-api: filename=Wikipedia_API-0.7.1-py3-none-any.whl size=14346 sha256=f1ab98eab1004aa47333dc2abb52ae77568a49fe06d45714116c37fbea1d20aa
  Stored in directory: /root/.cache/pip/wheels/4c/96/18/b9201cc3e8b47b02b510460210cfd832ccf10c0c4dd0522962
Successfully built wikipedia-api
Installing collected packages: wikipedia-api
Successfully installed wikipedia-api-0.7.1


In [4]:
import wikipediaapi

def fetch_wikipedia_data(keyword, lang='en'):
    """
    Wikipedia API를 사용하여 키워드 관련 정보를 가져옴.
    input: keyword (검색어), lang (언어: 'en', 'ko' 등)
    output: title, summary, content
    """
    wiki_wiki = wikipediaapi.Wikipedia('NLPproject',lang)
    page = wiki_wiki.page(keyword)

    return {
        "title": page.title,
        "summary": page.summary,
        "content": page.text
    }

# 예제 실행
keyword = "World War I"
data = fetch_wikipedia_data(keyword)
print(data["title"])
print(data['summary'])


World War I
World War I or the First World War (28 July 1914 – 11 November 1918), also known as the Great War, was a global conflict between two coalitions: the Allies (or Entente) and the Central Powers. Fighting took place mainly in Europe and the Middle East, as well as in parts of Africa and the Asia-Pacific, and in Europe was characterised by trench warfare and the use of artillery, machine guns, and chemical weapons (gas). World War I was one of the deadliest conflicts in history, resulting in an estimated 9 million military dead and 23 million wounded, plus up to 8 million civilian deaths from causes including genocide. The movement of large numbers of people was a major factor in the Spanish flu pandemic, which killed millions.
The causes of World War I included the rise of Germany and decline of the Ottoman Empire, which disturbed the long-standing balance of power in Europe, as well as economic competition between nations triggered by industrialisation and imperialism. Growin

In [5]:
def create_qa_pairs(wiki_data):
    """
    Wikipedia 데이터를 QA Pair로 변환.
    input: wiki_data (title, summary, content)
    output: List of QA Pairs
    """
    qa_pairs = []

    # 요약에서 질문-답변 생성
    question = f"What is {wiki_data['title']}?"
    answer = wiki_data['summary']
    qa_pairs.append({"question": question, "answer": answer})

    # 본문에서 추가 질문-답변 생성
    content_sentences = wiki_data['content'].split('. ')
    for sentence in content_sentences[:5]:  # 첫 5문장 사용
        question = f"Explain: {sentence.strip()}"
        answer = sentence.strip()
        qa_pairs.append({"question": question, "answer": answer})

    return qa_pairs

# 예제 실행
qa_pairs = create_qa_pairs(data)
for pair in qa_pairs:
    print(pair)

{'question': 'What is World War I?', 'answer': "World War I or the First World War (28 July 1914 – 11 November 1918), also known as the Great War, was a global conflict between two coalitions: the Allies (or Entente) and the Central Powers. Fighting took place mainly in Europe and the Middle East, as well as in parts of Africa and the Asia-Pacific, and in Europe was characterised by trench warfare and the use of artillery, machine guns, and chemical weapons (gas). World War I was one of the deadliest conflicts in history, resulting in an estimated 9 million military dead and 23 million wounded, plus up to 8 million civilian deaths from causes including genocide. The movement of large numbers of people was a major factor in the Spanish flu pandemic, which killed millions.\nThe causes of World War I included the rise of Germany and decline of the Ottoman Empire, which disturbed the long-standing balance of power in Europe, as well as economic competition between nations triggered by indu

In [6]:
import csv

def save_qa_to_csv(qa_pairs, file_path='qa_pairs.csv'):
    """
    QA Pair 데이터를 CSV 파일로 저장.
    """
    with open(file_path, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.DictWriter(file, fieldnames=['question', 'answer'])
        writer.writeheader()
        writer.writerows(qa_pairs)

save_qa_to_csv(qa_pairs)