Let's first train a T5 on turning affermative sentence into questions:

In [1]:
import random
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import (
    AdamW,
    T5ForConditionalGeneration,
    T5Tokenizer,
    get_linear_schedule_with_warmup
)
def set_seed(seed):
  random.seed(seed)
  np.random.seed(seed)
  torch.manual_seed(seed)
set_seed(42)

In [2]:
tokenizer = T5Tokenizer.from_pretrained('t5-base')
t5_model = T5ForConditionalGeneration.from_pretrained('t5-base')

In [3]:
# optimizer
no_decay = ["bias", "LayerNorm.weight"]
optimizer_grouped_parameters = [
    {
        "params": [p for n, p in t5_model.named_parameters() if not any(nd in n for nd in no_decay)],
        "weight_decay": 0.0,
    },
    {
        "params": [p for n, p in t5_model.named_parameters() if any(nd in n for nd in no_decay)],
        "weight_decay": 0.0,
    },
]
optimizer = AdamW(optimizer_grouped_parameters, lr=3e-4, eps=1e-8)

In [54]:
# dataset preparation

from_statements_to_questions = [
                               ("I would like a tour at 6 p.m.","Could I get a tour at 6 p.m.?"),
                               ("My wife wants an appointment Today at 3 p.m.","Can my wife get an appointment for today at 3 p.m.?"),
                               ("My husband needs a virtual tour at 11 a.m.","Can my husband get a virtual tour at 11 a.m.?"),
                               ("I wish a tour Tomorrow","Could I schedule a tour for tomorrow?"),
                               ("I need to tour the community","Can I tour the community?"),
                               ("I would like a video tour","Could I schedule a video tour?"),
                               ("I need a 3D tour","Can I get a 3D tour?"),
                               ("My father needs a 3D tour","Can my father get a 3D tour?"),
                               ("I need to see the place On January 5th at 3 p.m.","Can I see the place on January 5th at 3 p.m.?"),
                               ("I would like to have a look at the place On February 11th","Could I have a look at the place on February 11th?"),
                               ("My sister wants to come by Tomorrow","Can my sister come by tomorrow?"),
                               ("I need to stop by","Can I stop by?"),
                               ("My partner needs to stop by","Can my partner stop by?"),
                               ("I would like to see it in person On August 11th at 2 p.m.","Could I see it in person on August 11th at 2 p.m.?"),
                               ("I would like an appointment","Could I schedule an appointment?"),
                               ("My brother would like a tour with an agent On September 1st at 9 a.m.","Could my brother schedule a tour with an agent on September 1st at 9 a.m.?"),
                               ("I would like a virtual tour","Could I schedule a virtual tour?"),
                               ("I would like a 3D tour","Could I schedule a 3D tour?"),
]

In [55]:
import logging
logging.basicConfig(level=logging.ERROR)

t5_model.train()

epochs = 15

for epoch in range(epochs):
  print ("epoch ",epoch)
  for input,output in from_statements_to_questions:
    input_sent = "questionize: "+input+ " </s>"
    ouput_sent = output+" </s>"

    tokenized_inp = tokenizer.encode_plus(input_sent,  max_length=96, pad_to_max_length=True,return_tensors="pt")
    tokenized_output = tokenizer.encode_plus(ouput_sent, max_length=96, pad_to_max_length=True,return_tensors="pt")


    input_ids  = tokenized_inp["input_ids"]
    attention_mask = tokenized_inp["attention_mask"]

    lm_labels= tokenized_output["input_ids"]
    decoder_attention_mask=  tokenized_output["attention_mask"]


    # the forward function automatically creates the correct decoder_input_ids
    output = t5_model(input_ids=input_ids, labels=lm_labels,decoder_attention_mask=decoder_attention_mask,attention_mask=attention_mask)
    loss = output[0]

    loss.backward()
    optimizer.step()
    optimizer.zero_grad()

epoch  0
epoch  1
epoch  2
epoch  3
epoch  4
epoch  5
epoch  6
epoch  7
epoch  8
epoch  9
epoch  10
epoch  11
epoch  12
epoch  13
epoch  14


Now let's use this to create a method which converts an affermative sentence into a question:

In [56]:
def questionize(sentence):
    questionize_prob = random.uniform(0, 1)
    if questionize_prob <= .9:
        test_sent = 'questionize: %s </s>' % (sentence)
        test_tokenized = tokenizer.encode_plus(test_sent, return_tensors="pt")

        test_input_ids  = test_tokenized["input_ids"]
        test_attention_mask = test_tokenized["attention_mask"]

        t5_model.eval()
        beam_outputs = t5_model.generate(
            input_ids=test_input_ids,attention_mask=test_attention_mask,
            max_length=100,
            early_stopping=True,
            num_beams=10,
            num_return_sequences=1,
            no_repeat_ngram_size=2
        )

        for beam_output in beam_outputs:
            sent = tokenizer.decode(beam_output, skip_special_tokens=True,clean_up_tokenization_spaces=True)
            return sent
    else:
        return sentence + "."

Then let's create a code which puts together the sentence piece by piece

In [82]:
from faker import Faker
from numpy import random
fake = Faker()


def synonyms_want():
    synonyms_want_choices=["would like","want","need","wish","desire"]
    return random.choice(synonyms_want_choices)

def synonyms_away():
    synonyms_away_choices=["away","out of State","out of the Country","on vacation","traveling","traveling"]
    return random.choice(synonyms_away_choices)

def synonyms_location():
    synonyms_location_choices=["community","apartment","flat","place","building","unit","rental unit"]
    return random.choice(synonyms_location_choices)

def synonyms_person():
    synonyms_person_choices=["wife","husband","brother","mother","sister","client","partner","father"]
    return random.choice(synonyms_person_choices)


def hour():
    hour_gen=random.uniform(0,1)
    if hour_gen <=.4:
        hour = " at " + str(random.randint(8, 11))+":"+str(random.randint(0, 5))+"0 a.m."
    elif .4< hour_gen <=.8:
        hour = " at " + str(random.randint(1, 6))+":"+str(random.randint(0, 5))+"0 p.m."
    else:
        hour = ""
    return hour

def ordinal(number):
    number_type = number
    if number_type == 1 or number_type == 21 or number_type == 31:
        val = str(number_type) + "st"
    elif number_type == 2 or number_type == 22:
        val = str(number_type) + "nd"
    elif number_type == 3 or number_type == 23:
        val = str(number_type) + "rd"
    else:
        val = str(number_type) + "th"
    return val

def date():
    date_choices=([["January",random.randint(1,31)],
                   ["February",random.randint(1,28)],
                   ["March",random.randint(1,31)],
                   ["April",random.randint(1,30)],
                   ["May",random.randint(1,30)],
                   ["June",random.randint(1,30)],
                   ["July",random.randint(1,30)],
                   ["August",random.randint(1,30)],
                   ["September",random.randint(1,30)],
                   ["October",random.randint(1,30)],
                   ["November",random.randint(1,30)],
                   ["December",random.randint(1,30)]])
    np.random.shuffle(date_choices)
    date_value=date_choices[0]
    return date_value[0]+" "+str(ordinal(date_value[1]))


class message:
    def __init__(self):
        self.selfguided = 0
        self.escorted = 0
        self.virtual = 0
        self.video = 0
        self.time_after = True
        self.questionize = True
        self.comma = False
        self.question_mark=False

    def add_name(self,string):
        name_prob = random.uniform(0, 1)
        if name_prob <=.7 and self.comma:
            string += ", "  + faker.first_name() +"."
        elif name_prob <=.7:
            string += " "  + faker.first_name() +"."
        else:
            string += "."
        return string        
        
        
    def opening(self):
        opening_choices=[self.add_name("Good morning"),
                        self.add_name("Good evening"),
                        self.add_name("Good afternoon"),
                        "To whom it may concern,",
                        self.add_name("Hi"),
                        self.add_name("Hey"),
                        self.add_name("Hello"),
                        self.add_name("Sounds good"),
                        self.add_name("Great"),
                        self.add_name("Cool"),
                        self.add_name("Alright"),                        
                        self.add_name("Perfect"),
                        self.add_name("Wonderful"),
                        self.add_name(""),
                        ""]
        self.opening = random.choice(opening_choices)
        
    def intention(self):   # the entries in the vector are [selfguided,escorted,video,virtual] and [self.time_after,self.questionize]
        intention_choices=[["I "+ synonyms_want()+ " a self guided tour",[1,0,0,0],[True,True]],
                          ["I "+ synonyms_want()+ " to see the "+synonyms_location()+" on my own",[1,0,1,0],[True,True]],
                          ["I "+ synonyms_want()+ " to see the "+synonyms_location()+" in person",[1,1,0,0],[True,True]],
                          ["I "+ synonyms_want()+ " someone with me on my tour",[0,1,0,0],[True,True]],            
                          ["I "+ synonyms_want()+ " to be accompanied on my tour",[0,1,0,0],[True,True]],
                          ["My "+ synonyms_person()+" wants to actually see the "+synonyms_location(),[1,1,0,0],[True,True]],              
                          ["My "+ synonyms_person()+" wants to be accompanied on the tour",[0,1,0,0],[True,True]],  
                          ["My "+ synonyms_person()+" wants to see the "+synonyms_location()+" in person",[1,1,0,0],[True,True]],                        
                          ["My "+ synonyms_person()+" wants to be on sight",[1,1,0,0],[True,True]],                        
                          ["My "+ synonyms_person()+" wants to have a virtual tour",[0,0,0,1],[True,True]],                        
                          ["I don't want a leasing agent for my tour",[1,0,1,0],[False,False]],
                          ["I "+ synonyms_want()+ " a video tour",[0,0,1,1],[True,True]],
                          ["I "+ synonyms_want()+ " a 3D tour",[0,0,1,1],[True,True]],
                          ["I "+ synonyms_want()+ " a Zoom tour",[0,0,0,1],[True,True]],
                          ["I "+ synonyms_want()+ " a virtual tour",[0,0,1,1],[True,True]],
                          ["I "+ synonyms_want()+ " an online tour",[0,0,1,1],[True,True]],
                          ["I "+ synonyms_want()+ " to see the "+synonyms_location()+" on video",[0,0,1,1],[True,True]], 
                          ["I "+ synonyms_want()+ " to see it from my computer",[0,0,1,1],[True,True]],
                          ["I "+ synonyms_want()+ " to see it, but can't come in person",[0,0,1,1],[True,False]],
                          ["I "+ synonyms_want()+ " to see it on my own",[1,0,1,1],[True,False]],
                          ["I "+ synonyms_want()+ " to schedule a call",[0,0,0,1],[True,True]],
                          ["I'm "+synonyms_away()+", so I can't visit the "+synonyms_location(),[0,0,1,1],[False,False]],
                          ["I can't be there in person",[0,0,1,1],[False,False]],
                          ["I "+ synonyms_want()+ " a tour with a leasing agent",[0,1,0,0],[True,True]],
                          ["I "+ synonyms_want()+ " a tour without a leasing agent",[1,0,0,0],[True,True]],
                          ["I "+ synonyms_want()+ " to see the "+synonyms_location()+" in person",[1,1,0,0],[True,True]],
                          ["I "+ synonyms_want()+ " to be able to talk to a leasing agent on my tour",[0,1,0,0],[True,True]],
                          ["I "+ synonyms_want()+ " to see the place",[1,1,1,1],[True,True]],
                          ["I "+ synonyms_want()+ " an appointment",[1,1,0,0],[True,True]],
                          ["I "+ synonyms_want()+ " to have a look at the "+synonyms_location(),[1,1,1,1],[True,True]],
                          ["I "+ synonyms_want()+ " to see it in person",[1,1,0,0],[True,True]]]
        np.random.shuffle(intention_choices)
        intention_value=intention_choices[0]
        self.intention=intention_value[0]
        [self.selfguided,self.escorted,self.video,self.virtual]=intention_value[1]
        [self.time_after,self.questionize]=intention_value[2]
            
            
    def closing(self):
        self.comma = True
        closing_choices=[self.add_name("Thank you"),
                        self.add_name("I hope this works"),
                        self.add_name("Thanks"),
                        self.add_name("I hope to hear back soon"),
                        self.add_name("See you soon"),
                        self.add_name("Goodbye"),
                        self.add_name("Thanks in advance"),
                        self.add_name("Let me know"),                         
                        self.add_name("")]
        self.closing = random.choice(closing_choices)
   
    def time(self):
        time_choices = ["Today" + hour(),
                       "Tomorrow" + hour(),
                       "On " + str(date())]
        self.time = random.choice(time_choices)
            
            
    def maybe(self):
        maybe_choices=[[", maybe",True],
                      [", possibly",True],
                      [", perhaps",True],
                      [", what about",True],
                      [" ",False]]
        np.random.shuffle(maybe_choices)
        maybe_value=maybe_choices[0]
        self.question_mark = maybe_value[1]
        return maybe_value[0]

    
    
    def punctuation(self):
        if self.question_mark:
            value = "?"
        else: 
            value ="."
        return value
    
    def output(self):
        output_prob = random.uniform(0, 1)
        if output_prob<=0.8 and self.time_after:
            return self.opening + " " + self.intention + self.maybe() + " " + self.time + self.punctuation() + " " + self.closing  
        elif self.time_after and self.questionize:
            return self.opening + " " +  questionize(self.intention + " " + self.time) + " " + self.closing 
        else:
            return self.opening + " " + self.time + " " + self.intention + self.punctuation() + " " + self.closing 

In [None]:
# Run this to make add_name work.

from faker import Faker

faker = Faker()

print(f'Name: {faker.name()}')
print(f'First name: {faker.first_name()}')
print(f'Last name: {faker.last_name()}')

print('--------------------------')

print(f'Male name: {faker.name_male()}')
print(f'Female name: {faker.name_female()}')

Now we can generate our deliverable and put into a file.

In [86]:
import timeit

start = timeit.default_timer()

data_sentences=[]
data_virtual=[]
data_selfguided=[]
data_video=[]
data_escorted=[]

for i in range(0,10000):
    sentence_new = message()
    sentence_new.opening()
    sentence_new.time()
    sentence_new.intention()
    sentence_new.closing()
    data_sentences.append(sentence_new.output())
    data_virtual.append(sentence_new.virtual)
    data_selfguided.append(sentence_new.selfguided)
    data_video.append(sentence_new.video)
    data_escorted.append(sentence_new.escorted)


data = {'Sentences': data_sentences,
        'Virtual': data_virtual,
        'Video': data_video,
        'Escorted': data_escorted,
        'Self-Guided': data_selfguided
        }

df = pd.DataFrame(data, columns= ['Sentences', 'Virtual', 'Video', 'Escorted', 'Self-Guided'])

df.to_csv ('sentences.csv', index = False, header=True)

stop = timeit.default_timer()
execution_time = stop - start

print("Program Executed in "+str(execution_time))

Program Executed in 3980.566726719
