In [None]:
import pandas as pd 
import random
from datetime import datetime, timedelta
import itertools
from convertdate import julian, gregorian, hebrew, islamic

In [None]:
events = ['The assembly',
 'The auction',
 'The ball',
 'The breakfast',
 'The camp',
 'The class',
 'The colloquium',
 'The concert',
 'The conference',
 'The debate',
 'The demonstration',
 'The dinner',
 'The discussion',
 'The exam',
 'The exhibition',
 'The festival',
 'The function',
 'The gala',
 'The game',
 'The gathering',
 'The get-together',
 'The interview',
 'The launch',
 'The lecture',
 'The lunch',
 'The match',
 'The meeting',
 'The movie',
 'The party',
 'The performance',
 'The practice',
 'The premiere',
 'The presentation',
 'The race',
 'The retreat',
 'The reunion',
 'The seminar',
 'The session',
 'The symposium',
 'The tour',
 'The training',
 'The trial',
 'The trip',
 'The webinar',
 'The wedding',
 'The workshop']

In [None]:
# Templates related to "day"
day_templates = [
    "{event} initially scheduled for next {day} has been moved forward {duration}. When will the event now take place?",
    "Your event was initially on {day} but has been postponed by {duration}. When is it?",
    "The concert scheduled for the coming {day} has been delayed by {duration}. Which day will it now fall on?",
    "If your birthday is on {day} and you plan a party {duration} later, when will your party be?",
    "A marathon was supposed to happen this coming {day}, but got shifted {duration} earlier. When will it occur?",
    "The conference that usually happens every {day} is now postponed by {duration}. When is the next one?",
    "Your usual gym day is {day}, but for next week, it is scheduled {duration} later. When will you go to the gym next week?",
    "The match initially set for {day} has now been advanced by {duration}. Which day is it on now?",
    "Your usual spa day on {day} of every week has been postponed {duration}. When will it be next week?",
    "The weekly town hall usually on {day} is delayed by {duration}. When will it happen?",
    "The workshop that was planned for {day} got rescheduled {duration} later. What is the new date?",
    "Your favorite TV show airs every {day}. If they move it {duration} earlier next week, when can you watch it?",
    "The city parade usually on {day} has been moved {duration} ahead. Which day will it be?",
    "Your dentist appointment initially on {day} got shifted {duration}. When should you visit?",
    "The museum trip planned for {day} has been postponed {duration}. Which day should you plan for?",
    "Your regular library day is on {day}. Next week it is shifted {duration}. When will it be?",
    "The carnival that happens every {day} has been moved {duration} earlier. When will it take place?",
    "The play initially scheduled for {day} has been delayed by {duration}. What is the revised date?",
    "The weekly market usually set up on {day} will be {duration} later next week. When should you visit?",
    "Your regular yoga class on {day} will be postponed by {duration} next week. When is it scheduled?"
]

# Diverse durations and days for filling the templates
days = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
durations = ["a day", "two days", "three days", "four days", "five days", "six days"]

number_map = {
    "a": 1,
    "one": 1,
    "two": 2,
    "three": 3,
    "four": 4,
    "five": 5,
    "six": 6,
}

def calculate_new_day(current_day, shift, direction):
    index = days.index(current_day)
    for word in shift.split():
        if word in number_map:
            shift_amount = number_map[word]
            break
            
    if direction == "earlier":
        shift_amount *= -1
        
    return days[(index + shift_amount) % 7]

def determine_direction(template):
    if any(word in template for word in ["forward", "advanced", "ahead", "earlier"]):
        return "earlier"
    if any(word in template for word in ["postponed", "delayed", "later", "rescheduled", "shifted"]):
        return "later"
    return None

data = {
    "Question": [],
    "Option A": [],
    "Option B": [],
    "Option C": [],
    "Answer": []
}

for _ in range(600): 
    template = random.choice(day_templates)
    day = random.choice(days)
    event = random.choice(events)
    duration = random.choice(durations)
    
    direction = determine_direction(template)
    correct_day = calculate_new_day(day, duration, direction)
    
    distractor_days = random.sample([d for d in days if d != correct_day], 2)
    
    options = [correct_day] + distractor_days
    random.shuffle(options)
    
    question = template.format(event=event, day=day, duration=duration)
    
    data["Question"].append(question)
    data["Option A"].append(options[0])
    data["Option B"].append(options[1])
    data["Option C"].append(options[2])
    data["Answer"].append("A" if options[0] == correct_day else "B" if options[1] == correct_day else "C")

df_day = pd.DataFrame(data)
df_day = df_day.drop_duplicates(subset=['Question'])
df_day['Category'] = 'Shift - MT'

In [None]:
# Templates related to "minute" / "hour"
templates_minute_hour = [
    "If your interview is originally set for {time} and you're now told to arrive {duration} in advance, when should you be there?",
    "Your train's regular schedule is {time}. However, today it's running {duration} behind. When will it depart?",
    "The movie officially starts at {time}. If you want to catch a scene {duration} after the start, when should you look for it?",
    "The webinar kicks off sharply at {time}. If you're targeting a section that starts {duration} post-beginning, when should you log in?",
    "Your alarm is precisely set for {time}. After hitting the snooze and taking an additional {duration}, when will it sound again?",
    "You typically leave work right at {time}. If today you decide to stay back for an extra {duration}, when will you finally head out?",
    "Your fitness session begins at {time}. Today, if you wish to extend it by another {duration}, when will you be done?",
    "Your business meeting is booked for {time}. If it's rescheduled to start {duration} post the original time, when is it now?",
    "You're aiming to take a break around {time}. If you push it by a further {duration}, when will you actually relax?",
    "Your most-watched TV show is broadcasted at {time}. If its timing gets shifted by {duration} next episode, when should you tune in?",
    "The match has its kick-off at {time}. Due to unforeseen reasons, if there's a delay of {duration}, when will it get underway?",
    "Your plane is supposed to depart at {time}. If it's preponed by {duration}, when is the revised departure?",
    "The meal was promised to be on the table at {time}. If it's going to be {duration} postponed, when can you expect to dine?",
    "You have an exciting date at {time}. If you're lagging by {duration}, when will you probably meet your date?",
    "Your virtual class has its start at {time}. If you decide to pop in {duration} after it begins, what's the entry time?",
    "The store's shutting down time is {time}. If you're still {duration} away from reaching, can you make it before the shutters are down?",
    "The bus is set to move at {time}. If you find yourself at the bus stop {duration} after the scheduled time, did you miss your ride?",
    "You have your dentist appointment fixed at {time}. Planning to be there {duration} ahead of the fixed time, when would you reach?",
    "The live concert commences at {time}. If the star performance begins {duration} after the start, what's the performance start time?",
    "You've planned a call right at {time}. If you have to bring it {duration} ahead, when's the new call time?",
    "Your flight is originally scheduled to depart at {time}. If it's rescheduled to take off {duration} earlier, when will it leave?",
    "The workshop you signed up for starts at {time}. If it begins {duration} earlier than planned, when should you be ready?",
    "The theater play was initially set to begin at {time}. If the curtain rises {duration} earlier, when does the show start?",
    "The book launch was slated for {time}. If the event begins {duration} ahead of schedule, when will it start?"  
]

In [None]:
time_durations = [
    "1 minute", "2 minutes", "3 minutes", "4 minutes", "5 minutes",
    "6 minutes", "7 minutes", "8 minutes", "9 minutes", "10 minutes",
    "11 minutes", "12 minutes", "13 minutes", "14 minutes", "15 minutes", 
    "16 minutes", "17 minutes", "18 minutes", "19 minutes", "20 minutes", 
    "21 minutes", "22 minutes", "23 minutes", "24 minutes", "25 minutes",
    "26 minutes", "27 minutes", "28 minutes", "29 minutes", "30 minutes",
    "31 minutes", "32 minutes", "33 minutes", "34 minutes", "35 minutes",
    "36 minutes", "37 minutes", "38 minutes", "39 minutes", "40 minutes",
    "41 minutes", "42 minutes", "43 minutes", "44 minutes", "45 minutes", 
    "46 minutes", "47 minutes", "48 minutes", "49 minutes", "50 minutes",
    "51 minutes", "52 minutes", "53 minutes", "54 minutes", "55 minutes", 
    "56 minutes", "57 minutes", "58 minutes", "59 minutes", "1 hour",
    "1 hour 5 minutes", "1 hour 10 minutes", "1 hour 15 minutes", "1 hour 20 minutes",
    "1 hour 25 minutes", "1 hour 30 minutes", "1 hour 35 minutes", "1 hour 40 minutes", 
    "1 hour 45 minutes", "1 hour 50 minutes", "1 hour 55 minutes", "2 hours",
    "2 hours 5 minutes", "2 hours 10 minutes", "2 hours 15 minutes", "2 hours 20 minutes",
    "2 hours 25 minutes", "2 hours 30 minutes", "2 hours 35 minutes", "2 hours 40 minutes",
    "2 hours 45 minutes", "2 hours 50 minutes", "2 hours 55 minutes", "3 hours", 
    "3 hours 30 minutes", "4 hours", "4 hours 30 minutes", "5 hours", 
    "5 hours 30 minutes", "6 hours", "6 hours 30 minutes", "7 hours", 
    "7 hours 30 minutes", "8 hours", "8 hours 30 minutes", "9 hours", 
    "9 hours 30 minutes", "10 hours", "10 hours 30 minutes", "11 hours", 
    "11 hours 30 minutes", "12 hours"
]

def get_minutes_from_duration(duration):
    hours = 0
    minutes = 0
    if "hour" in duration:
        parts = duration.split("hour")
        hours = int(parts[0].strip())
        
        # Check if there's a "minute" substring after the "hour" substring
        if "minute" in parts[1]:
            minutes = int(parts[1].split("minute")[0].strip().replace("s", "").strip())
    else:
        minutes = int(duration.split("minute")[0].strip())
    
    total_minutes = hours * 60 + minutes
    return total_minutes

time_map = {duration: get_minutes_from_duration(duration) for duration in time_durations}


def determine_direction(template):
    if any(word in template for word in ["advance", "preponed", "ahead", "earlier"]):
        return "earlier"
    if any(word in template for word in ["behind", "after", "post-beginning", "additional", "extra", "another", "post", "further", "shifted", "delay", "postponed", "lagging by", "away"]):
        return "later"
    return None

def calculate_new_time(initial_time, shift, direction):
    
    shift_minutes = time_map[shift]
    
    if direction == "earlier":
        new_time = initial_time - timedelta(minutes=shift_minutes)
    else:
        new_time = initial_time + timedelta(minutes=shift_minutes)
    return new_time

# Generate questions and answers
problems = []
options_list = []
correct_answers = []

for _ in range(1000):  
    template = random.choice(templates_minute_hour)
    event_time = datetime.strptime(f"{random.randint(1,12)}:{random.randint(0,59)} {random.choice(['AM', 'PM'])}", "%I:%M %p")
    duration = random.choice(time_durations)

    direction = determine_direction(template)
    correct_time = calculate_new_time(event_time, duration, direction)

    question = template.format(time=event_time.strftime("%I:%M %p"), duration=duration)

    distractor_times = random.sample([event_time + timedelta(minutes=i*15) for i in range(1, 5) if event_time + timedelta(minutes=i*15) != correct_time], 2)

    options = [correct_time.strftime("%I:%M %p"), distractor_times[0].strftime("%I:%M %p"), distractor_times[1].strftime("%I:%M %p")]
    random.shuffle(options)

    problems.append(question)
    options_list.append(options)
    answer_mapping = {0: 'A', 1: 'B', 2: 'C'}
    correct_answers.append(answer_mapping[options.index(correct_time.strftime("%I:%M %p"))])

df_min_hour = pd.DataFrame({
    'Question': problems,
    'Option A': [opts[0] for opts in options_list],
    'Option B': [opts[1] for opts in options_list],
    'Option C': [opts[2] for opts in options_list],
    'Answer': correct_answers
})
df_min_hour = df_min_hour.drop_duplicates(subset=['Question'])
df_min_hour['Category'] = 'Shift - ST'

In [None]:
months = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']
day_ordinals = ['1st', '2nd', '3rd', '4th', '5th', '6th', '7th', '8th', '9th', '10th', '11th', '12th', '13th', '14th', '15th', '16th', '17th', '18th', '19th', '20th', '21st', '22nd', '23rd', '24th', '25th', '26th', '27th', '28th']
week_ordinals = ["1st", "2nd", "3rd", "4th"]

week_month_templates = [
    "Your vacation originally planned for the {week_ordinal} week of {month} has been rescheduled to {duration}. In which date is it most likely now?",
    "The annual event typically during the {week_ordinal} week of {month} will now happen {duration}. Approximately when will it occur?",
    "The marathon you've been preparing for in the {week_ordinal} week of {month} will now be {duration}. Around when should you be ready?",
    "The art exhibit set for the {week_ordinal} week of {month} will now open {duration}. Around which date is the opening?",
    "The tech conference typically in the {week_ordinal} week of {month} has been moved {duration}. Roughly which date should you mark your calendar for?",
    "The town carnival usually during the {week_ordinal} week of {month} will now be {duration}. About which date is it now?",
    "The music fest during the {week_ordinal} week of {month} will be held {duration}. Around which date will it likely be?",
    "The product launch in the {week_ordinal} week of {month} has been shifted {duration}. Around when will it likely be?",
    "The yearly gala typically in the {week_ordinal} week of {month} will now be on {duration}. About which date is it now?",
    "The corporate retreat set for the {week_ordinal} week of {month} has been moved {duration}. Roughly when is it now?",
    "The winter fest typically in the {week_ordinal} week of {month} is scheduled {duration}. Roughly which date should you mark?",
    "The international trade fair set for the {week_ordinal} week of {month} will now be {duration}. Approximately when is it?",
    "Your family reunion during the {week_ordinal} week of {month} will now be {duration}. Approximately what's the new date?",
    "Your friend's wedding set for the {week_ordinal} week of {month} will now be celebrated {duration}. Approximately when should you attend?",
    "The city parade in the {week_ordinal} week of {month} will now take place {duration}. Around which date is it likely to be?",
    "The beach party set for the {week_ordinal} week of {month} has been moved {duration}. Approximately when should you head to the beach?"
]


problems = []
options = []
correct_answers = []

answer_map = {0: 'A', 1: 'B', 2: 'C'}

problems = []
optionA = []
optionB = []
optionC = []
correct_ans = []

for _ in range(1000):
    month = random.choice(months)
    week_num = random.randint(1, 4)
    
    start_of_week_day = (week_num - 1) * 7 + 1  # Determine the first day of the given week

    initial_date = datetime.strptime(f"{month} {start_of_week_day} 2023", '%B %d %Y')

    duration_value = random.randint(1, 4)
    shift_type = random.choice(["weeks earlier", "weeks later"])
    
    if shift_type == "weeks earlier":
        new_date = initial_date - timedelta(weeks=duration_value)
    else:
        new_date = initial_date + timedelta(weeks=duration_value)
    
    problem = random.choice(week_month_templates).format(month=month, week_ordinal=week_ordinals[week_num-1], duration=f"{duration_value} {shift_type}")  # Corrected here
    problems.append(problem)
    
    shifted_month = new_date.strftime('%B')
    correct_time = new_date.day

    correct_time = max(1, min(correct_time, 28))
    
    distractor1 = correct_time + random.choice([-7, 7])
    distractor2 = correct_time + random.choice([-14, 14])
    
    while distractor1 == correct_time or distractor1 == distractor2 or distractor1 < 1 or distractor1 > 28:
        distractor1 = correct_time + random.choice([-7, 7])
        
    while distractor2 == correct_time or distractor2 == distractor1 or distractor2 < 1 or distractor2 > 28:
        distractor2 = correct_time + random.choice([-14, 14])

    options = [
        f"{shifted_month} {day_ordinals[correct_time-1]}", 
        f"{shifted_month} {day_ordinals[distractor1-1]}",
        f"{shifted_month} {day_ordinals[distractor2-1]}"
    ]
    
    random.shuffle(options)
    
    optionA.append(options[0])
    optionB.append(options[1])
    optionC.append(options[2])
    correct_ans.append(options.index(f"{shifted_month} {day_ordinals[correct_time-1]}") + 1)

df_week_month = pd.DataFrame({
    "Question": problems,
    "Option A": optionA,
    "Option B": optionB,
    "Option C": optionC,
    "Answer": correct_ans
})
df_week_month = df_week_month.drop_duplicates(subset=['Question'])

In [None]:
df_week_month['Answer'] = df_week_month['Answer'].replace({1: 'A', 2: 'B', 3: 'C'})
df_week_month['Category'] = 'Shift - MT'

In [None]:
forward_shift_templates = [
    "The next cosmic event, initially predicted for {year}, has been postponed by {duration} years. When is it now expected?",
    "The treaty signed in {year} will be reviewed after {duration} years. In which year will that be?",
    "The futuristic city set in literature is imagined to be in the year {year}. If the story jumps forward by {duration} years, in which year will it be then?",
    "The company's foundation was in {year}. They plan to launch a grand project after {duration} years. When will that be?",
    "The archaeological site dates back to {year}. A significant discovery is made {duration} years after its establishment. When did this discovery happen?",
    "The star, predicted to explode in {year}, has its explosion postponed by {duration} years. When is the new prediction for the explosion?",
    "The art movement, begun in {year}, is predicted to peak in popularity after {duration} years. When will this peak happen?",
    "The ice age, which started in {year}, will see a significant climatic change after {duration} years. When is that expected?",
    "The prophecy was revealed in {year}. It is foretold to come true after {duration} years. When is the prophecy expected to materialize?",
    "The legend speaks of a hero born in {year}. He is expected to save his kingdom after {duration} years from his birth. When will that happen?"
]

backward_shift_templates = [
    "A famous artifact, discovered in {year}, was believed to have been crafted {duration} years prior. When was it likely made?",
    "The ruins that were uncovered in {year} are estimated to date back an additional {duration} years. From which year do these ruins originate?",
    "The document found in {year} references an event that occurred {duration} years earlier. When did that event likely take place?",
    "The ancient tree, cut down in {year}, was estimated to have sprouted {duration} years before its removal. When did it most likely sprout?",
    "The star, observed to have unique properties in {year}, underwent a transformation {duration} years prior. When did this transformation occur?",
    "The dynasty which fell in {year} had risen to power roughly {duration} years earlier. When was its establishment?",
    "The temple, believed to be renovated in {year}, was initially built {duration} years prior. When was its original construction?",
    "The ancient city ruins, discovered in {year}, thrived as a trade center {duration} years before its fall. When was its golden period?",
    "The manuscript, completed in {year}, is based on events from {duration} years prior. When did those events take place?",
    "The meteorite, found in {year}, crashed to Earth approximately {duration} years earlier. When did the impact likely occur?"
]

In [None]:
def generate_year_questions(num_problems):
    problems = []
    options_list = []
    answers = []

    for _ in range(num_problems):
        year = random.randint(1800, 2023) # choosing a year from 0 to 999999
        duration = random.randint(1, 1000) # limiting duration to 1000 for example
        shift = random.choice(["forward", "backward"])

        if shift == "forward":
            template = random.choice(forward_shift_templates)
            new_year = year + duration
        else:
            template = random.choice(backward_shift_templates)
            new_year = year - duration

        problem = template.format(year=year, duration=duration)
        problems.append(problem)

        # Randomizing position of correct answer among the options
        random_options = [new_year, new_year + random.randint(1, 3), new_year + random.randint(4, 6)]
        random.shuffle(random_options)
        
        options_list.append(random_options)
        
        # Determine the correct answer based on shuffled position
        correct_option = ["A", "B", "C"][random_options.index(new_year)]
        answers.append(correct_option)

    df = pd.DataFrame({
        'Question': problems,
        'Option A': [opt[0] for opt in options_list],
        'Option B': [opt[1] for opt in options_list],
        'Option C': [opt[2] for opt in options_list],
        'Answer': answers
    })

    return df

df_year = generate_year_questions(600)
df_year = df_year.drop_duplicates(subset=['Question'])
df_year['Category'] = 'Shift - LT'

In [None]:
def generate_calendar_question():
    year, month, day = random.randint(1800, 2023), random.randint(1, 12), random.randint(1, 28)
    question_template = "If the date is {month}/{day}/{year} in the {source_calendar}, what is the date in the {target_calendar}?"

    source_calendar = random.choice(["Gregorian", "Julian", "Hebrew", "Islamic"])
    target_calendar = random.choice([cal for cal in ["Gregorian", "Julian", "Hebrew", "Islamic"] if cal != source_calendar])

    if source_calendar == "Gregorian":
        from_cal_function = gregorian.to_jd
    elif source_calendar == "Julian":
        from_cal_function = julian.to_jd
    elif source_calendar == "Hebrew":
        from_cal_function = hebrew.to_jd
    else:  # Islamic
        from_cal_function = islamic.to_jd

    if target_calendar == "Gregorian":
        to_cal_function = gregorian.from_jd
    elif target_calendar == "Julian":
        to_cal_function = julian.from_jd
    elif target_calendar == "Hebrew":
        to_cal_function = hebrew.from_jd
    else:  # Islamic
        to_cal_function = islamic.from_jd

    jd_date = from_cal_function(year, month, day)
    converted_year, converted_month, converted_day = to_cal_function(jd_date)

    if converted_year < 0:
        converted_year = abs(converted_year)  # Convert negative year to positive year

    question = question_template.format(month=month, day=day, year=year, source_calendar=source_calendar, target_calendar=target_calendar)
    correct_answer = f"{converted_month}/{converted_day}/{converted_year}"

    options = [correct_answer]
    while len(options) < 3:
        fake_day = random.randint(1, 28)
        fake_month = random.randint(1, 12)
        fake_year = random.randint(max(converted_year - 2, 0), converted_year + 2)  # Ensure positive year
        fake_option = f"{fake_month}/{fake_day}/{fake_year}"
        if fake_option not in options:
            options.append(fake_option)

    random.shuffle(options)
    answer_index = options.index(correct_answer)
    answer_key = ['A', 'B', 'C'][answer_index]

    return {
        "Question": question,
        "Option A": options[0],
        "Option B": options[1],
        "Option C": options[2],
        "Answer": answer_key
    }

# Generate questions
data = [generate_calendar_question() for _ in range(300)]
df_calendar = pd.DataFrame(data)
df_calendar = df_calendar.drop_duplicates(subset=['Question'])
df_calendar['Category'] = 'Shift - Calendar'

In [None]:
base_phrases = {
    # Days
    "tomorrow": ["the next day", "24 hours from now", "a day from today"],
    "yesterday": ["a day prior", "24 hours before", "one day earlier"],
    "in three days": ["72 hours from now", "a few days later"],
    "three days ago": ["72 hours earlier", "a few days before"],
    
    # Weeks
    "in the coming week": ["within seven days", "by the week's end", "before next week"],
    "a week before": ["seven days prior", "the previous seven days", "a week earlier"],
    "in three weeks": ["21 days from now", "several weeks later"],
    "three weeks ago": ["21 days back", "a few weeks earlier"],
    
    # Months
    "in the succeeding month": ["in the next 30 days", "before the next month", "within a month"],
    "a month prior": ["30 days back", "four weeks ago"],
    "in several months": ["in a handful of months", "in multiple months", "after a few months"],
    
    # Years
    "in the subsequent year": ["365 days later", "the next 12 months", "within the coming year"],
    "a year prior": ["12 months back", "365 days ago", "the last year"],
    "in multiple years": ["in a span of years", "several years from now", "after a few years"],
    "years ago": ["multiple years back", "years earlier", "a span of years ago"],
    
    # Vague Future
    "in the foreseeable future": ["in the times ahead", "before too long"],
    "eventually": ["ultimately", "sooner or later", "at some point"],
    
    # Vague Past
    "long ago": ["much earlier", "far back in time", "way back when"],
    "not too long ago": ["quite recently", "just a little while back"],
    
    # Hours/Minutes
    "in a couple of hours": ["in about 120 minutes", "a few hours from now", "soon"],
    "a couple of hours ago": ["about 120 minutes ago", "a short while back", "recently"],
    "in several minutes": ["in a handful of minutes", "after a short period", "in a bit"],
    "several minutes ago": ["a little while ago", "moments earlier", "just then"],
    
    # Misc
    "in due time": ["eventually", "when the time comes", "at the right time"],
    "immediately": ["right away", "without delay", "straightaway"],
    "in a jiffy": ["in a flash", "rapidly", "swiftly", "in a quick moment"],
    "before you know it": ["before long", "in no time", "quickly"],
    "for the time being": ["temporarily", "for now", "momentarily"],
    "at the crack of dawn": ["early morning", "first light", "sunrise"],
    "twilight": ["dusk", "evening's onset", "sunset"],
    "down the road": ["in the future", "later on", "with time"],
    "in ages": ["after a long time", "it's been a while", "in forever"],
    "momentarily": ["in a short while", "briefly", "for a moment"],
    "from dawn till dusk": ["all day", "from morning to evening", "daylight hours"],
    "in the nick of time": ["just in time", "barely", "right on time"],
    "any second now": ["imminently", "very soon", "almost now"],
    "in the wee hours": ["early morning", "before sunrise", "pre-dawn"],
    "in a heartbeat": ["immediately", "right away", "without hesitation"],
    "in the dead of night": ["deepest part of the night", "middle of the night", "very late at night"],
    "once in a blue moon": ["very infrequently", "rarely", "hardly ever"],
    "when pigs fly": ["never", "unlikely to happen", "a time that won't occur"],
    "till the cows come home": ["a very long time", "indefinitely", "for an uncertain long period"],
    "for an eternity": ["forever", "a very long time", "an endless period"],
    "in the blink of an eye": ["instantly", "very quickly", "in a very short time"],
    "when the stars align": ["a fortunate time", "when circumstances are right", "at an opportune moment"],
    "on the eve of": ["just before", "the night before", "right before a significant event"],
    "till kingdom come": ["forever", "an indefinite long time", "until the end of time"],
    "from time immemorial": ["since ancient times", "for a very long time", "from an age long past"],
    "when hell freezes over": ["never", "a time that won't occur", "very unlikely time"],
    "for a coon's age": ["a very long time", "ages", "an indefinite period"],
    "till the twelfth of never": ["never", "a fictional time", "an impossible time"],
    "on the morrow": ["the next day", "day after the current", "following day"],
    "by the time the sun sets": ["end of the day", "evening", "before night"]
}

question_templates = [
    "If someone mentions that an event will happen '{phrase}', when should you anticipate it?",
    "An event is scheduled '{phrase}'. When can you expect this to be?",
    "The timeline indicates '{phrase}' for a certain event. What does this mean in terms of timing?",
    "Someone noted that a milestone will be achieved '{phrase}'. When is this referring to?",
    "You receive a memo with the timestamp '{phrase}'. When should you be prepared?",
    "A festival is being organized '{phrase}'. When would that be?",
    "A note suggests meeting '{phrase}'. When is this suggesting?",
    "The forecast predicts rain '{phrase}'. When is this likely to occur?",
    "If a deadline is mentioned as '{phrase}', when should it be met?",
    "A historic event is documented to have happened '{phrase}'. When did it take place?"
]

In [None]:
def generate_question_from_phrase(base_key, base_phrases):
    # Decide whether the key or a value becomes the main problem
    if random.choice([True, False]):
        # Base key is the main problem
        question_text = base_key
        correct = random.choice(base_phrases[base_key])
    else:
        # Value (phrase) is the main problem
        question_text = random.choice(base_phrases[base_key])
        correct = base_key
    
    question = random.choice(question_templates).format(phrase=question_text)

    # Distractors
    distractor_keys = random.sample(list(base_phrases.keys()), 2)
    while base_key in distractor_keys:  # Ensure the base_key is not in the distractors
        distractor_keys = random.sample(list(base_phrases.keys()), 2)
    
    distractor1 = random.choice(base_phrases[distractor_keys[0]])
    distractor2 = random.choice(base_phrases[distractor_keys[1]])
    
    while distractor1 == distractor2:
        distractor2 = random.choice(base_phrases[distractor_keys[1]])

    # Shuffle options
    options = [correct, distractor1, distractor2]
    random.shuffle(options)

    return question, options, options.index(correct)

def generate_mcq_dataframe(base_phrases, num_questions=450):
    questions, options_a, options_b, options_c, answers = [], [], [], [], []

    for _ in range(num_questions):
        base_key = random.choice(list(base_phrases.keys()))
        question, options, correct_idx = generate_question_from_phrase(base_key, base_phrases)

        # Add to lists
        questions.append(question)
        options_a.append(options[0])
        options_b.append(options[1])
        options_c.append(options[2])
        answers.append('ABC'[correct_idx])

    # Create DataFrame
    df = pd.DataFrame({
        'Question': questions,
        'Option A': options_a,
        'Option B': options_b,
        'Option C': options_c,
        'Answer': answers
    })

    return df

# Using the base_phrases defined earlier
df_implicit_phrases = generate_mcq_dataframe(base_phrases)
df_implicit_phrases = df_implicit_phrases.drop_duplicates(subset=['Question'])
df_implicit_phrases['Category'] = 'Interpretation'

In [None]:
df_ambiguity_resolution = pd.concat([df_implicit_phrases, df_min_hour, df_day, df_week_month, df_year, df_calendar], ignore_index=True)