In [1]:
!pip install spacy transformers nltk
!python -m spacy download en_core_web_sm

Collecting en-core-web-sm==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m38.3 MB/s[0m eta [36m0:00:00[0m
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [2]:
# Импортируем необходимые библиотеки
import nltk
import spacy
from transformers import T5ForConditionalGeneration, T5Tokenizer

# Устанавливаем зависимости
!pip install nltk spacy transformers
!python -m spacy download en_core_web_sm

# Загрузка моделей
nltk.download('punkt')
nlp = spacy.load('en_core_web_sm')

Collecting en-core-web-sm==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m28.8 MB/s[0m eta [36m0:00:00[0m
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


In [3]:
# Пример текста
text = """
There are five types of schools in the US educational system. They are: kindergarten, elementary school, middle school, high school and private school. Children go to kindergarten when they are 5 years old They go to elementary school from ages 6 through 11 (1-5 grades), middle school from ages 12 through 14 (6-8 grades) and high school from ages 15 through 19 (9-12 grades).

About 90 percent of all children attend public school, which is free. The other 10 percent go I private schools, which often include religious education. They are similar to the public schools but parents must pay for their children to go to these schools. About half of all private schools are run by Catholics.

In the United States, education is mainly the responsibility of state and local governments, not the national government. The amount of money spent on education differs from state to state. The subjects studied also differ a little. The school year usually runs from September to June. At the high school level, there are some specialized schools. They include schools that emphasize vocational subjects like business or auto mechanics. Most high schools are general schools. High school students are often involved in the non-academic activities that their school offers -for example, in drama clubs, sports teams, or the school newspaper.
"""

In [4]:
# Разбор текста
doc = nlp(text)

# Извлечение именованных сущностей
entities = [(ent.text, ent.label_) for ent in doc.ents]

# Извлечение предложений
sentences = [sent.text for sent in doc.sents]

# Пример вывода
print("Entities:", entities)
print("Sentences:", sentences)

Entities: [('five', 'CARDINAL'), ('US', 'GPE'), ('5 years old', 'DATE'), ('11', 'CARDINAL'), ('1', 'CARDINAL'), ('14', 'CARDINAL'), ('6', 'CARDINAL'), ('9-12', 'CARDINAL'), ('About 90 percent', 'PERCENT'), ('10 percent', 'PERCENT'), ('About half', 'CARDINAL'), ('Catholics', 'NORP'), ('the United States', 'GPE'), ('The school year', 'DATE'), ('September to June', 'DATE')]
Sentences: ['\nThere are five types of schools in the US educational system.', 'They are: kindergarten, elementary school, middle school, high school and private school.', 'Children go to kindergarten when they are 5 years old They go to elementary school from ages 6 through 11 (1-5 grades), middle school from ages 12 through 14 (6-8 grades) and high school from ages 15 through 19 (9-12 grades).\n\n', 'About 90 percent of all children attend public school, which is free.', 'The other 10 percent go I private schools, which often include religious education.', 'They are similar to the public schools but parents must pay 

In [5]:
# Определяем шаблоны для генерации вопросов
import random

def generate_question(entity, sentence):
    question_templates = {
        "PERSON": ["Who is {}?", "Who was mentioned in the context of {}?"],
        "ORG": ["What organization is {}?", "Which organization is related to {}?"],
        "GPE": ["Where is {}?", "What place is referred to as {}?"],
        "DATE": ["When did {} happen?", "What date is {}?"]
    }

    entity_text, entity_label = entity
    if entity_label in question_templates:
        template = random.choice(question_templates[entity_label])
        return template.format(entity_text)
    return None

# Генерация вопросов на основе шаблонов
questions = []
for entity in entities:
    for sentence in sentences:
        if entity[0] in sentence:
            question = generate_question(entity, sentence)
            if question:
                questions.append(question)

# Пример вывода вопросов
print("Template-based Questions:")
for question in questions:
    print(question)

Template-based Questions:
What place is referred to as US?
What date is 5 years old?
What place is referred to as the United States?
When did The school year happen?
When did September to June happen?


In [11]:
# Генерация вопросов с использованием модели T5
# Загрузка модели T5
model = T5ForConditionalGeneration.from_pretrained('t5-small')
tokenizer = T5Tokenizer.from_pretrained('t5-small')

def generate_t5_question(context, answer):
    input_text = f"Question: {context} Answer: {answer} Generate question:"
    input_ids = tokenizer.encode(input_text, return_tensors='pt')

    outputs = model.generate(input_ids)
    question = tokenizer.decode(outputs[0], skip_special_tokens=True)

    return question

# Генерация вопросов с использованием T5
t5_questions = []
for entity in entities:
    for sentence in sentences:
        if entity[0] in sentence:
            question = generate_t5_question(sentence, entity[0])
            t5_questions.append(question)

# Вывод сгенерированных вопросов, с использованием T5
print("T5-based Questions:")
for question in t5_questions:
    print(question)


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


T5-based Questions:
Answer: five Generate question: five Answer: five Generate question:
Answer:
Answer: 5 years old Generate question: 5 years old Generate question: 5 years old
Answer: 11 Generate question:
Answer: 1 Generate question:
Answer: 1 Generate question: 1 Answer: 1 Answer: 1 Generate question: 1
Answer: 14 Generate question: 14 Answer:
Answer: 6 Generate question: 6 Generate question: 6 Answer: 6 Generate question
Answer: 9-12 Generate question: 9-12.
Answer: About 90 percent Generate question: About 90 percent of all children attend public school,
Answer: 10 percent Generate question: 10 percent Generate question: 10 percent Generate question
Answer: About half Generate question:
Answer:
Answer: the United States Generate question:
Question: The school year Generate question:
Question: The school year usually runs from September to June. Answer: September to June Generate


In [13]:
# prompt: Теперь создается просто часть текста, а не вопроса

import nltk
import spacy
from transformers import T5ForConditionalGeneration, T5Tokenizer
import random
!pip install spacy transformers nltk
!python -m spacy download en_core_web_sm
# Импортируем необходимые библиотеки

# Устанавливаем зависимости
!pip install nltk spacy transformers
!python -m spacy download en_core_web_sm

# Загрузка моделей
nltk.download('punkt')
nlp = spacy.load('en_core_web_sm')
# Пример текста
text = """
There are five types of schools in the US educational system. They are: kindergarten, elementary school, middle school, high school and private school. Children go to kindergarten when they are 5 years old They go to elementary school from ages 6 through 11 (1-5 grades), middle school from ages 12 through 14 (6-8 grades) and high school from ages 15 through 19 (9-12 grades).

About 90 percent of all children attend public school, which is free. The other 10 percent go I private schools, which often include religious education. They are similar to the public schools but parents must pay for their children to go to these schools. About half of all private schools are run by Catholics.

In the United States, education is mainly the responsibility of state and local governments, not the national government. The amount of money spent on education differs from state to state. The subjects studied also differ a little. The school year usually runs from September to June. At the high school level, there are some specialized schools. They include schools that emphasize vocational subjects like business or auto mechanics. Most high schools are general schools. High school students are often involved in the non-academic activities that their school offers -for example, in drama clubs, sports teams, or the school newspaper.
"""
# Разбор текста
doc = nlp(text)

# Извлечение именованных сущностей
entities = [(ent.text, ent.label_) for ent in doc.ents]

# Извлечение предложений
sentences = [sent.text for sent in doc.sents]

# Пример вывода
print("Entities:", entities)
print("Sentences:", sentences)
# Определяем шаблоны для генерации текста

def generate_text(entity, sentence):
    text_templates = {
        "PERSON": ["{} is a person mentioned in the context.", "{} is a character in the story."],
        "ORG": ["{} is an organization mentioned in the context.", "{} is a company in the story."],
        "GPE": ["{} is a place mentioned in the context.", "{} is a location in the story."],
        "DATE": ["{} is a date mentioned in the context.", "{} is a time period in the story."]
    }

    entity_text, entity_label = entity
    if entity_label in text_templates:
        template = random.choice(text_templates[entity_label])
        return template.format(entity_text)
    return None

# Генерация текста на основе шаблонов
texts = []
for entity in entities:
    for sentence in sentences:
        if entity[0] in sentence:
            text = generate_text(entity, sentence)
            if text:
                texts.append(text)

# Пример вывода текста
print("Template-based Text:")
for text in texts:
    print(text)
# Генерация текста с использованием модели T5
# Загрузка модели T5
model = T5ForConditionalGeneration.from_pretrained('t5-small')
tokenizer = T5Tokenizer.from_pretrained('t5-small')

def generate_t5_text(context, entity):
    input_text = f"Context: {context} Entity: {entity} Generate text:"
    input_ids = tokenizer.encode(input_text, return_tensors='pt')

    outputs = model.generate(input_ids)
    text = tokenizer.decode(outputs[0], skip_special_tokens=True)

    return text

# Генерация текста с использованием T5
t5_texts = []
for entity in entities:
    for sentence in sentences:
        if entity[0] in sentence:
            text = generate_t5_text(sentence, entity[0])
            t5_texts.append(text)

# Вывод сгенерированного текста, с использованием T5
print("T5-based Text:")
for text in t5_texts:
    print(text)


Collecting en-core-web-sm==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m79.0 MB/s[0m eta [36m0:00:00[0m
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.
Collecting en-core-web-sm==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m33.6 MB/s[0m eta [36m0:00:00[0m
[38;5;2m✔ Download and installation success

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


Entities: [('five', 'CARDINAL'), ('US', 'GPE'), ('5 years old', 'DATE'), ('11', 'CARDINAL'), ('1', 'CARDINAL'), ('14', 'CARDINAL'), ('6', 'CARDINAL'), ('9-12', 'CARDINAL'), ('About 90 percent', 'PERCENT'), ('10 percent', 'PERCENT'), ('About half', 'CARDINAL'), ('Catholics', 'NORP'), ('the United States', 'GPE'), ('The school year', 'DATE'), ('September to June', 'DATE')]
Sentences: ['\nThere are five types of schools in the US educational system.', 'They are: kindergarten, elementary school, middle school, high school and private school.', 'Children go to kindergarten when they are 5 years old They go to elementary school from ages 6 through 11 (1-5 grades), middle school from ages 12 through 14 (6-8 grades) and high school from ages 15 through 19 (9-12 grades).\n\n', 'About 90 percent of all children attend public school, which is free.', 'The other 10 percent go I private schools, which often include religious education.', 'They are similar to the public schools but parents must pay 

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


T5-based Text:
Context: There are five types of schools in the US educational system. Entity: five
Context: There are five types of schools in the US educational system. Entity: US
12 through 14 (6-8 grades) and high school from 15 through 19 (9-12 grades).
12 through 14 (6-8 grades) and high school from 15 through 19 (9-12 grades).
12 through 14 (6-8 grades) and high school from 15 through 19 (9-12 grades).
Context: The other 10 percent go I private schools, which often include religious education. Ent
12 through 14 (6-8 grades) and high school from 15 through 19 (9-12 grades).
12 through 14 (6-8 grades) and high school from 15 through 19 (9-12 grades).
14 (6-8 grades) and high school from 15 through 19 (9-12 grades). Entity
Context: About 90 percent of all children attend public school, which is free. Entity
Context: The other 10 percent go I private schools, which often include religious education. Ent
Context: About half of all private schools are run by Catholics. Entity: About
:
