# IQ TEST SEQUENCE BOT - Sequence Parser

In [59]:
# Imports
import numpy as np
import pandas as pd
import re

In [60]:
#read in the raw data
seqdataIn = pd.read_json('../data/seq-public.json', orient='records')
print(seqdataIn)

                  stem               options            category    id
0           7,9,-1,5,?        [4, 2, -1, -3]            sequence     0
1        3,2,5/3,3/2,?  [1/4, 7/5, 3/4, 2/5]            sequence     1
2           1,2,5,26,?    [34, 841, 677, 37]            sequence     2
3            2,12,30,?      [50, 65, 75, 56]            sequence     3
4        2,1,2/3,1/2,?  [3/4, 1/4, 2/5, 5/6]            sequence     4
...                ...                   ...                 ...   ...
1071  20 22 25 30 37 ?                    []  sequence-reasoning  1090
1072        0 1 3 10 ?                    []  sequence-reasoning  1091
1073       5 15 10 215                    []  sequence-reasoning  1092
1074        1 2 5 29 ?    [34, 841, 866, 37]  sequence-reasoning  1093
1075         2 12 30 ?      [50, 65, 75, 56]  sequence-reasoning  1094

[1076 rows x 4 columns]


In [61]:
sequences = seqdataIn['stem']
print(sequences)

0             7,9,-1,5,?
1          3,2,5/3,3/2,?
2             1,2,5,26,?
3              2,12,30,?
4          2,1,2/3,1/2,?
              ...       
1071    20 22 25 30 37 ?
1072          0 1 3 10 ?
1073         5 15 10 215
1074          1 2 5 29 ?
1075           2 12 30 ?
Name: stem, Length: 1076, dtype: object


In [62]:
ids = seqdataIn['id']
print(ids)

0          0
1          1
2          2
3          3
4          4
        ... 
1071    1090
1072    1091
1073    1092
1074    1093
1075    1094
Name: id, Length: 1076, dtype: int64


In [63]:
a = dict(zip(ids, sequences))
for k,v in a.items():
    print(str(k) + " : " + str(v))

0 : 7,9,-1,5,?
1 : 3,2,5/3,3/2,?
2 : 1,2,5,26,?
3 : 2,12,30,?
4 : 2,1,2/3,1/2,?
5 : 4,2,2,3,6,?
6 : 1,7,8,57,?
7 : 4,12,8,10,?
8 : 1/2,1,1,?,9/11,11/13
9 : 95,88,71,61,50,?
10 : 2,6,13,39,15,45,23,?
11 : 1,3,3,5,7,9,13,15,?,?
12 : 1,2,8,28,?
13 : 0,4,18,?,100
14 : 1,1, 2, 2, 3, 4, 3, 5, ?
15 : 1,52, 313, 174,?
16 : 5, 15, 10, 215, ?
17 : -7, 0, 1, 2, 9, ?
18 : 0,1,3,10,?
19 : 9/2,14,65/2,?,217/2
20 : 1,1,2,6,24,?
21 : 3,4,8,24,88,?
22 : 20,22,25,30,37,?
23 : 1/9,2/27,1/27
24 : √2,3,√28,√65,?
25 : 1,2,4,8,16,?
26 : 2,1,2/3,1/2,?
27 : 1,1,3,7,17,41,?
28 : 5/2,5,25/2,75/2,?
29 : 6,15,35,77,?
30 : 1,3,3,6,7,12,15,?
31 : 2/3,1/2,3/7,7/18,?
32 : 63,26,7,0,-1,-2,?
33 : 1,2,3,6,11,20,?
34 : 1,2,3,7,16,?
35 : 2,15,7,40,77,?
36 : 2,6,12,20,?
37 : 0,6,24,60,120,?
38 : 2,12,30,?
39 : 1,2,3,6,12,?
40 : 1,3,6,12,?
41 : -2,-8,0,64,?
42 : 129,107,73,17,-73,?
43 : 0,4,18,48,100,?
44 : 65,35,17,3,?
45 : 1,6,13,?
46 : -2,1,-1/2,1/4,-1/8,?
47 : 1,5,9,14,21,?
48 : 1,2,4,6,9,?,18
49 : 120,20,?,-4
50 : 6,13,

In [64]:
for k,v in sequences.items():
    print(str(k) + " : " + str(v))

0 : 7,9,-1,5,?
1 : 3,2,5/3,3/2,?
2 : 1,2,5,26,?
3 : 2,12,30,?
4 : 2,1,2/3,1/2,?
5 : 4,2,2,3,6,?
6 : 1,7,8,57,?
7 : 4,12,8,10,?
8 : 1/2,1,1,?,9/11,11/13
9 : 95,88,71,61,50,?
10 : 2,6,13,39,15,45,23,?
11 : 1,3,3,5,7,9,13,15,?,?
12 : 1,2,8,28,?
13 : 0,4,18,?,100
14 : 1,1, 2, 2, 3, 4, 3, 5, ?
15 : 1,52, 313, 174,?
16 : 5, 15, 10, 215, ?
17 : -7, 0, 1, 2, 9, ?
18 : 0,1,3,10,?
19 : 9/2,14,65/2,?,217/2
20 : 1,1,2,6,24,?
21 : 3,4,8,24,88,?
22 : 20,22,25,30,37,?
23 : 1/9,2/27,1/27
24 : √2,3,√28,√65,?
25 : 1,2,4,8,16,?
26 : 2,1,2/3,1/2,?
27 : 1,1,3,7,17,41,?
28 : 5/2,5,25/2,75/2,?
29 : 6,15,35,77,?
30 : 1,3,3,6,7,12,15,?
31 : 2/3,1/2,3/7,7/18,?
32 : 63,26,7,0,-1,-2,?
33 : 1,2,3,6,11,20,?
34 : 1,2,3,7,16,?
35 : 2,15,7,40,77,?
36 : 2,6,12,20,?
37 : 0,6,24,60,120,?
38 : 2,12,30,?
39 : 1,2,3,6,12,?
40 : 1,3,6,12,?
41 : -2,-8,0,64,?
42 : 129,107,73,17,-73,?
43 : 0,4,18,48,100,?
44 : 65,35,17,3,?
45 : 1,6,13,?
46 : -2,1,-1/2,1/4,-1/8,?
47 : 1,5,9,14,21,?
48 : 1,2,4,6,9,?,18
49 : 120,20,?,-4
50 : 6,13,

In [65]:
print(sequences[409])
print(type(a))
print(type(sequences))

1,2,3,5,4,4,?,2,2
<class 'dict'>
<class 'pandas.core.series.Series'>


In [68]:
#cleans up sequences
def txt2seq(seq):
    s2 = {}
    for i,s in seq.items():
        si = re.sub("\(\s?\)","?",s)       #replace parenthesis with question mark
        a = re.split(',|\s',si)      #remove any delimiters
        a = [e for e in a if e != ""]  #remove empty strings
        s2[i] = a
    return s2
split_seq = txt2seq(pd.Series(a))
print("Split: " + str(split_seq))

Split: {0: ['7', '9', '-1', '5', '?'], 1: ['3', '2', '5/3', '3/2', '?'], 2: ['1', '2', '5', '26', '?'], 3: ['2', '12', '30', '?'], 4: ['2', '1', '2/3', '1/2', '?'], 5: ['4', '2', '2', '3', '6', '?'], 6: ['1', '7', '8', '57', '?'], 7: ['4', '12', '8', '10', '?'], 8: ['1/2', '1', '1', '?', '9/11', '11/13'], 9: ['95', '88', '71', '61', '50', '?'], 10: ['2', '6', '13', '39', '15', '45', '23', '?'], 11: ['1', '3', '3', '5', '7', '9', '13', '15', '?', '?'], 12: ['1', '2', '8', '28', '?'], 13: ['0', '4', '18', '?', '100'], 14: ['1', '1', '2', '2', '3', '4', '3', '5', '?'], 15: ['1', '52', '313', '174', '?'], 16: ['5', '15', '10', '215', '?'], 17: ['-7', '0', '1', '2', '9', '?'], 18: ['0', '1', '3', '10', '?'], 19: ['9/2', '14', '65/2', '?', '217/2'], 20: ['1', '1', '2', '6', '24', '?'], 21: ['3', '4', '8', '24', '88', '?'], 22: ['20', '22', '25', '30', '37', '?'], 23: ['1/9', '2/27', '1/27'], 24: ['√2', '3', '√28', '√65', '?'], 25: ['1', '2', '4', '8', '16', '?'], 26: ['2', '1', '2/3', '1/2',

In [75]:
#checks for valid sequences (all numbers or fractions excluding ?)
def findBadSeq(seq):
    badSeqs = {}
    for a in seq.keys():     #check each sequence
        s = seq[a]
        for i in s:   #check each item (assuming sequence is already split)
            if(re.match(r'[^0-9\/\?\-]',i)):    #check if any words or weird characters in the sequence
                badSeqs[a] = s
                break
    return badSeqs

#remove bad sequences from good sequences
def remBadSeq(seq):
    badSeqInd = list(findBadSeq(seq).keys())
    goodSeq = {}
    for i in seq.keys():
        if i not in badSeqInd:     #want to keep the indexes to get the answer for it
            goodSeq[i] = seq[i]
    return goodSeq


In [76]:
# show bad sequences removed
bs = findBadSeq(split_seq)
for k,v in bs.items():
    print(str(k) + ":" + str(v))
print("# Bad sequences = " + str(len(bs)) + "/" + str(len(sequences)))

24:['√2', '3', '√28', '√65', '?']
86:['A', 'F', 'H', 'K', 'N', '?', '?']
87:['A', 'D', 'I', 'P', 'Y', 'CF', 'DI', 'FD', '?']
119:['+1', '-1', '1', '-1', '?']
162:['19/13', '1', '19/13', '10/22', '?', 'a.7/24;b.7/25;c.5/26;d.7/26;']
300:['√5', '√55', '11√5', '11√55', '?']
371:['0', '3', '1', '6', '√2', '12', '?', '?', '2', '48']
393:['1/2', '√3/2', '√3/3', '?']
415:['(√5-1)/4', '1/3', '(√3', '-1)/2', '√2', '-1', '?']
479:['A', 'C', 'F', 'J', 'O', '?', 'What', 'letter', 'comes', 'next?']
497:['72', '(68)', '41', '28', '(98)', '16', '34', '?', '56']
502:['4342', '(3176)', '1726', '7995', '(7516)', '2162', '8418', '?', '1725']
504:['-3', '+6', '+2', '-3', '+7', '-12', '?']
622:['961', '(25)', '432', '932', '?', '731']
625:['16', '(96)', '12', '10', '?', '7.5']
629:['The', 'following', 'series', 'of', 'numbers', 'contains', 'one', 'number', 'that', 'does', 'not', 'fit', 'the', 'pattern', 'set', 'by', 'the', 'others.', 'What', 'number', 'does', 'not', 'fit?', '3', '5', '7', '11', '14', '17']

In [77]:
#get only good sequences
gs = remBadSeq(split_seq)
for k,v in gs.items():
    print(str(k) + ":" + str(v))
print("# Good sequences = " + str(len(gs)) + "/" + str(len(sequences)))

0:['7', '9', '-1', '5', '?']
1:['3', '2', '5/3', '3/2', '?']
2:['1', '2', '5', '26', '?']
3:['2', '12', '30', '?']
4:['2', '1', '2/3', '1/2', '?']
5:['4', '2', '2', '3', '6', '?']
6:['1', '7', '8', '57', '?']
7:['4', '12', '8', '10', '?']
8:['1/2', '1', '1', '?', '9/11', '11/13']
9:['95', '88', '71', '61', '50', '?']
10:['2', '6', '13', '39', '15', '45', '23', '?']
11:['1', '3', '3', '5', '7', '9', '13', '15', '?', '?']
12:['1', '2', '8', '28', '?']
13:['0', '4', '18', '?', '100']
14:['1', '1', '2', '2', '3', '4', '3', '5', '?']
15:['1', '52', '313', '174', '?']
16:['5', '15', '10', '215', '?']
17:['-7', '0', '1', '2', '9', '?']
18:['0', '1', '3', '10', '?']
19:['9/2', '14', '65/2', '?', '217/2']
20:['1', '1', '2', '6', '24', '?']
21:['3', '4', '8', '24', '88', '?']
22:['20', '22', '25', '30', '37', '?']
23:['1/9', '2/27', '1/27']
25:['1', '2', '4', '8', '16', '?']
26:['2', '1', '2/3', '1/2', '?']
27:['1', '1', '3', '7', '17', '41', '?']
28:['5/2', '5', '25/2', '75/2', '?']
29:['6', '1

In [78]:
#converts all sequence items to float values
def floatConv(s):
    try:
        return float(s)
    except ValueError:
        try:
            num, denom = s.split('/')
            return float(num) / float(denom)
        except ValueError:
            return 'X'   #bad

#converts all the sequences to floats
def seq2Float(seq):
    floatSeq = {}
    for i in seq.keys():
        s = list(map(lambda x: floatConv(x) if x != "?" else x, seq[i]))
        if "X" in s:  #found a bad one (don't use)
            continue
        else:
            floatSeq[i] = s
    return floatSeq

In [79]:
fs = seq2Float(gs)
for k,v in fs.items():
    print(str(k) + ":" + str(v))
print("# Float sequences = " + str(len(fs)) + "/" + str(len(sequences)))

0:[7.0, 9.0, -1.0, 5.0, '?']
1:[3.0, 2.0, 1.6666666666666667, 1.5, '?']
2:[1.0, 2.0, 5.0, 26.0, '?']
3:[2.0, 12.0, 30.0, '?']
4:[2.0, 1.0, 0.6666666666666666, 0.5, '?']
5:[4.0, 2.0, 2.0, 3.0, 6.0, '?']
6:[1.0, 7.0, 8.0, 57.0, '?']
7:[4.0, 12.0, 8.0, 10.0, '?']
8:[0.5, 1.0, 1.0, '?', 0.8181818181818182, 0.8461538461538461]
9:[95.0, 88.0, 71.0, 61.0, 50.0, '?']
10:[2.0, 6.0, 13.0, 39.0, 15.0, 45.0, 23.0, '?']
11:[1.0, 3.0, 3.0, 5.0, 7.0, 9.0, 13.0, 15.0, '?', '?']
12:[1.0, 2.0, 8.0, 28.0, '?']
13:[0.0, 4.0, 18.0, '?', 100.0]
14:[1.0, 1.0, 2.0, 2.0, 3.0, 4.0, 3.0, 5.0, '?']
15:[1.0, 52.0, 313.0, 174.0, '?']
16:[5.0, 15.0, 10.0, 215.0, '?']
17:[-7.0, 0.0, 1.0, 2.0, 9.0, '?']
18:[0.0, 1.0, 3.0, 10.0, '?']
19:[4.5, 14.0, 32.5, '?', 108.5]
20:[1.0, 1.0, 2.0, 6.0, 24.0, '?']
21:[3.0, 4.0, 8.0, 24.0, 88.0, '?']
22:[20.0, 22.0, 25.0, 30.0, 37.0, '?']
23:[0.1111111111111111, 0.07407407407407407, 0.037037037037037035]
25:[1.0, 2.0, 4.0, 8.0, 16.0, '?']
26:[2.0, 1.0, 0.6666666666666666, 0.5, '?']
2

In [80]:
ansDatIn = pd.read_json("../data/seq-public.answer.json", orient='index')
answers = ansDatIn['answer']

In [81]:
floatConv("-2")

-2.0

In [82]:
seq2Float({"1":["-2","-4","8"]})

{'1': [-2.0, -4.0, 8.0]}

In [83]:
fs[41]

[-2.0, -8.0, 0.0, 64.0, '?']

In [84]:
sequences[41]

'-2,-8,0,64,?'

KeyError: 82

NameError: name 'opts' is not defined