In [13]:
# Imports
import numpy as np
import pandas as pd
import re
import csv
import random
import math

In [114]:
#turns a sequence into a question format
# seq - whole dataset interger sequence
# l   - length of the question sequence
# c   - number of answer choices available
# p   - probability of missing element being last in the sequence
def toMCQuestion(seq,l,c,p):
    i = {}
    l = min(len(seq), l)
    
    #question
    a = random.randint(0,len(seq)-l)
    b = a+l-1
    s = seq[a:b]
    sd = math.ceil(np.std(s))
    if random.random() < p:
        m = len(s)-1              #use last element as missing
    else:
        m = random.randint(0,len(s)-1)   #random missing element
    
    #add sequence
    ans = s[m]
    s[m] = "?"    #set as missing
    i["sequence"] = s
    #i["sequence"] = ",".join([str(j) for j in s])
    
    #make randomly generated choices
    cs = []
    cs.append(ans)   #add right answer
    
    while len(cs) < c:       #add fake answers
        e = math.floor(ans+sd*random.uniform(-2,3))    #vary the answer choices using std and uniform randomness
        if e not in cs:
            cs.append(e)
    
    random.shuffle(cs)       #shuffle order
    i["options"] = cs
    
    #set answer to the missing element and add
    i["answer"] = np.where(np.array(cs)==ans)[0][0]
    
    return i


#test making questions from fibonacci
for i in range(10):
    print(toQuestion([1,1,2,3,5,8,13,21,34],5,4,0.75))

{'sequence': [3, 5, 8, '?'], 'options': [18, 7, 16, 13], 'answer': 3}
{'sequence': [3, 5, 8, '?'], 'options': [13, 23, 6, 19], 'answer': 0}
{'sequence': [1, '?', 2, 3], 'options': [0, 1, -1, 2], 'answer': 1}
{'sequence': [2, 3, 5, '?'], 'options': [8, 11, 15, 13], 'answer': 0}
{'sequence': [3, 5, 8, '?'], 'options': [21, 18, 13, 20], 'answer': 2}
{'sequence': [3, 5, 8, '?'], 'options': [16, 11, 10, 13], 'answer': 3}
{'sequence': [5, 8, 13, '?'], 'options': [27, 31, 7, 21], 'answer': 3}
{'sequence': [2, 3, '?', 8], 'options': [5, 9, 6, 11], 'answer': 0}
{'sequence': [5, 8, 13, '?'], 'options': [9, 40, 21, 34], 'answer': 2}
{'sequence': [2, 3, 5, '?'], 'options': [8, 4, 3, 2], 'answer': 0}


In [135]:
'''
  Assume the dataset is in vectorized 2d numeric array format where the whole set is given per row/sample
  Dataset is in the form .csv
  
  ex. 
      [ 9, 67, 14, 14, 65,  0, 23, 57, 49, 11, 22, 77, 43, 21, 60, 74, 56  .....] (length = 43)
      
'''

# read in dataset from a csv as a 2d array
def importData(filename):
    return list(csv.reader(open(filename)))


# create multiple choice questions from all of the samples in the dataset
# INPUTS:
# minQuest = minimum number of questions to use (-1 = all samples, > len(dataset) = +random selection )
# seqRange = length range of the starting sequence
# ansRange = number range of the possible answers
# lastProb = probability of the missing element being at the last element or somewhere else

def multiChoice(dataset, minQuest = -1, seqRange=[3,17],ansRange=[3,6],lastProb=0.85):
    qdat = []
    seqstr = []                #keep track of stringified saved sequences
    shufDat = dataset.copy()
    random.shuffle(shufDat)
    
    #go over each item
    i = 0
    for d in shufDat:
        #generate a question from the sample
        l = random.randint(seqRange[0],seqRange[1])
        c = random.randint(ansRange[0],ansRange[1])
        q = toQuestion(d,l,c,lastProb)
        
        q["id"] = i
        i+= 1
        
        seqstr.append(str(q["sequence"]))
        
        qdat.append(q)
        
        #enough questions, finish
        if minQuest > 0 and len(qdat) > minQuest:
            break
          
    #get random extra questions to fill quota
    leftover = minQuest - len(shufDat)
    if minQuest != -1 and leftover > 0:
        for i in range(leftover):
            
            d = random.choice(shufDat)  #get random sequence from the data
            
            #generate a question from the sample
            l = random.randint(seqRange)
            c = random.randint(ansRange)
            q = toQuestion(d,l,c,lastProb)

            q["id"] = i
            i+= 1

            #check if already in the dataset 
            if str(q["sequence"]) not in seqstr:
                qdat.append(q)
            
            #enough questions, finish
            if len(questions) > minQuest:
                break
    
    #return multiple choice set
    return qdat

In [136]:
#test on arithmetic sequences
def testArith(n,k):
    d = []
    for i in range(n):
        kr = random.randint(5,k)
        a = random.randint(0,5)
        b = random.randint(1,20)
        s = []
        for j in range(kr):
            s.append(a+j*b)
        d.append(s)
    return d

td = testArith(30,13)
print(td)

[[2, 15, 28, 41, 54, 67, 80, 93], [0, 6, 12, 18, 24, 30, 36, 42, 48], [1, 11, 21, 31, 41, 51, 61, 71, 81, 91, 101, 111, 121], [5, 11, 17, 23, 29, 35, 41], [5, 24, 43, 62, 81], [0, 8, 16, 24, 32, 40, 48, 56], [5, 19, 33, 47, 61], [2, 11, 20, 29, 38], [1, 13, 25, 37, 49, 61], [4, 11, 18, 25, 32, 39, 46, 53], [0, 17, 34, 51, 68, 85, 102, 119, 136, 153], [0, 13, 26, 39, 52, 65, 78, 91, 104, 117], [1, 21, 41, 61, 81, 101, 121, 141, 161], [2, 11, 20, 29, 38, 47, 56, 65, 74, 83], [3, 20, 37, 54, 71, 88, 105, 122, 139, 156], [3, 7, 11, 15, 19], [0, 3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33], [1, 10, 19, 28, 37, 46, 55, 64, 73, 82, 91, 100, 109], [2, 5, 8, 11, 14, 17, 20, 23, 26, 29, 32, 35], [4, 9, 14, 19, 24, 29, 34, 39, 44, 49, 54, 59, 64], [4, 21, 38, 55, 72, 89, 106, 123, 140, 157, 174, 191, 208], [4, 24, 44, 64, 84, 104, 124, 144, 164, 184, 204, 224, 244], [3, 18, 33, 48, 63, 78, 93, 108, 123, 138, 153, 168], [0, 6, 12, 18, 24, 30, 36, 42, 48, 54], [0, 9, 18, 27, 36, 45, 54, 63, 72], [5, 21

In [137]:
multiChoice(td)

[{'sequence': [0, 13, 26, 39, 52, 65, 78, 91, '?'],
  'options': [104, 114, 106],
  'answer': 0,
  'id': 0},
 {'sequence': [106, 123, 140, 157, 174, '?'],
  'options': [188, 191, 139],
  'answer': 1,
  'id': 1},
 {'sequence': [9, '?', 27],
  'options': [10, 27, 30, 7, 18, 33],
  'answer': 4,
  'id': 2},
 {'sequence': [25, 37, '?'],
  'options': [42, 61, 65, 49, 60, 69],
  'answer': 3,
  'id': 3},
 {'sequence': [5, 11, 17, 23, 29, '?'],
  'options': [38, 22, 35, 58, 40],
  'answer': 2,
  'id': 4},
 {'sequence': [41, 61, 81, 101, 121, '?'],
  'options': [170, 186, 141, 113],
  'answer': 2,
  'id': 5},
 {'sequence': [3, 7, 11, '?'],
  'options': [11, 6, 25, 26, 7, 15],
  'answer': 5,
  'id': 6},
 {'sequence': [2, 11, 20, 29, 38, 47, 56, 65, '?'],
  'options': [74, 39, 143, 54, 44],
  'answer': 0,
  'id': 7},
 {'sequence': [4, 11, 18, '?'],
  'options': [25, 29, 13, 20],
  'answer': 0,
  'id': 8},
 {'sequence': [5, '?', 43, 62],
  'options': [-3, 23, 47, 24, 56],
  'answer': 3,
  'id': 9},