In [58]:
from sklearn.ensemble import RandomForestClassifier
from scipy.sparse import csc_matrix, bmat, vstack, save_npz, csc_matrix, load_npz
import numpy as np

import time
import pickle
import subprocess

In [59]:
DATA = 'data/'

# Read data
with open(DATA + 'statements', 'r') as f:
    statements = f.read().splitlines()
    stmts_names = [st.split(',')[0][len('fof('):] for st in statements]
    
with open(DATA + 'chronology', 'r') as f:
    chronology = f.read().splitlines()
    
with open(DATA + 'features', 'r') as f:
    features = f.read().splitlines()
    features = {f.split(':')[0] : f.split(':')[1].split(', ') for f in features}
    
with open(DATA + 'dependencies_train', 'r') as f:
    train_dep_lines = f.read().splitlines()
    train_dep = {}
    for f in train_dep_lines:
        stmt = f.split(':')[0]
        prms = f.split(':')[1].split(' ')
        if stmt in train_dep.keys():
            train_dep[stmt] += prms
        else:
            train_dep[stmt] = prms
    
with open(DATA + 'conjectures_test', 'r') as f:
    conj_test = f.read().splitlines()
    
map_chrono = {f : i for i, f in enumerate(chronology)}

flatten = lambda t: [item for sublist in t for item in sublist]
land = lambda l1, l2: [a and b for a, b in zip(l1, l2)]

# Prepare features sparse vectors
fts = list(set(flatten(list(features.values()))))
map_fts = {f : i for i, f in enumerate(fts)}

# Now I will convert statements to vectors
stmts = {st: [int(f in features[st]) for f in fts] for st in stmts_names}

In [60]:
len(trainY)

206639

In [84]:
dense = trainX.to_dense()

<383851x10241 sparse matrix of type '<class 'numpy.float64'>'
	with 2361988 stored elements in COOrdinate format>

In [97]:
trainX = load_npz('train_data/fullTrainX200.npz')
with open('train_data/fullTrainY200', 'rb') as ty:
    trainY = pickle.load(ty)

In [98]:
forest = RandomForestClassifier(max_depth=20, random_state=2137)
forest.fit(trainX, trainY)

RandomForestClassifier(max_depth=20, random_state=2137)

In [72]:
def run_proof (conj, premises):
    with open('premises.txt', 'w') as p_file:
        for p in premises:
            p_file.write(p)
            p_file.write('\n')
    subprocess.run("export EPROVER=~/EProver/Ecompiledlinux/E/PROVER/eprover", shell = True)
    res = subprocess.run("python3 run_E_prover.py --conjecture " + conj_name
                   + ' --premises premises.txt'
                   + ' --statements data/statements', shell = True, capture_output=True)
    found = 'FOUND' in str(res.stdout) and 'NOT FOUND' not in str(res.stdout)
    print(res.stdout)
    return int(found)

In [73]:
def score_and_prove(forest, conj):
    scores = forest.predict_proba([land(conj, stmts[dep_name]) for dep_name in chronology[:map_chrono[conj_name]]])

    premises = []
    for i, s in enumerate(scores):
        if (s[1] > 0.9):
            premises.append(chronology[:map_chrono[conj_name]][i])

    return run_proof(conj, premises)

In [95]:
conj_name = conj_test[10]
conj_name = list(train_dep.keys())[20]
conj = stmts[conj_name]
scores = forest.predict_proba([land(conj, stmts[dep_name]) for dep_name in chronology[:map_chrono[conj_name]]])

In [96]:
scores

array([[0.53760982, 0.46239018],
       [0.53760982, 0.46239018],
       [0.53760982, 0.46239018],
       [0.53760982, 0.46239018],
       [0.53760982, 0.46239018],
       [0.53760982, 0.46239018],
       [0.53760982, 0.46239018],
       [0.53760982, 0.46239018],
       [0.53760982, 0.46239018],
       [0.53760982, 0.46239018],
       [0.53760982, 0.46239018],
       [0.53760982, 0.46239018],
       [0.53760982, 0.46239018],
       [0.53760982, 0.46239018],
       [0.53760982, 0.46239018],
       [0.53760982, 0.46239018],
       [0.53760982, 0.46239018],
       [0.53760982, 0.46239018],
       [0.53760982, 0.46239018],
       [0.53760982, 0.46239018],
       [0.53760982, 0.46239018],
       [0.53760982, 0.46239018],
       [0.53760982, 0.46239018],
       [0.53760982, 0.46239018],
       [0.53760982, 0.46239018],
       [0.53760982, 0.46239018],
       [0.53760982, 0.46239018],
       [0.53760982, 0.46239018],
       [0.53760982, 0.46239018],
       [0.53760982, 0.46239018],
       [0.

In [86]:
for s in scores:
    if s[1] > 0.6:
        print("hura")

In [43]:
sum(stmts[chronology[:map_chrono[conj_name]][100]])
map_chrono[conj_name]

232

In [42]:
len(scores)

232

In [25]:
proven = 0
for conj_name in conj_test:
    print(conj_name)
    proven += score_and_prove(forest, stmts[conj_name])
print("PROVED: ", 100 * proven / len(conj_test), "% of test set")

t34_waybel_0
b'Proof of conjecture t34_waybel_0 NOT found. Output saved in file t34_waybel_0.E_output \n'
t5_funct_2
b'Proof of conjecture t5_funct_2 NOT found. Output saved in file t5_funct_2.E_output \n'
t57_tmap_1


KeyboardInterrupt: 

In [16]:
proven = 0
for conj_name in list(train_dep.keys()):
    print(conj_name)
    proven += score_and_prove(forest, stmts[conj_name])
print("PROVED: ", 100 * proven / len(conj_test), "% of test set")

t3_xboole_0
b'Proof of conjecture t3_xboole_0 FOUND. Output in file t3_xboole_0.E_output\n'
t7_xboole_0
b'Proof of conjecture t7_xboole_0 FOUND. Output in file t7_xboole_0.E_output\n'
t6_xboole_1
b'Proof of conjecture t6_xboole_1 FOUND. Output in file t6_xboole_1.E_output\n'
t9_xboole_1
b'Proof of conjecture t9_xboole_1 FOUND. Output in file t9_xboole_1.E_output\n'
t10_xboole_1
b'Proof of conjecture t10_xboole_1 FOUND. Output in file t10_xboole_1.E_output\n'
t11_xboole_1
b'Proof of conjecture t11_xboole_1 FOUND. Output in file t11_xboole_1.E_output\n'
t12_xboole_1
b'Proof of conjecture t12_xboole_1 FOUND. Output in file t12_xboole_1.E_output\n'
t15_xboole_1
b'Proof of conjecture t15_xboole_1 FOUND. Output in file t15_xboole_1.E_output\n'
t17_xboole_1
b'Proof of conjecture t17_xboole_1 FOUND. Output in file t17_xboole_1.E_output\n'
t18_xboole_1
b'Proof of conjecture t18_xboole_1 FOUND. Output in file t18_xboole_1.E_output\n'
t20_xboole_1
b'Proof of conjecture t20_xboole_1 FOUND. Output 

KeyboardInterrupt: 

In [21]:
m = 0
that_thing = None
for k, v in train_dep.items():
    if (len(v) > m):
        m = len(v)
        that_thing = k
print(m)

3692


In [23]:
that_thing

't36_funct_2'

In [22]:
train_dep[that_thing]

['t31_funct_2',
 't22_funct_2',
 'redefinition_r2_relset_1',
 't35_funct_2',
 'redefinition_k1_partfun1',
 'redefinition_r2_relset_1',
 'cc1_relset_1',
 'dt_k2_funct_1',
 'dt_k1_partfun1',
 't78_relat_1',
 't55_funct_1',
 't24_funct_2',
 'fc4_funct_1',
 't30_funct_2',
 'redefinition_k2_relset_1',
 't35_funct_2',
 't61_funct_1',
 'dt_k6_partfun1',
 'redefinition_k6_partfun1',
 't55_relat_1',
 'redefinition_k1_partfun1',
 't80_relat_1',
 't71_relat_1',
 't31_funct_2',
 'redefinition_r2_relset_1',
 'd1_funct_2',
 'cc1_relset_1',
 'dt_k2_funct_1',
 'dt_k1_partfun1',
 't64_funct_1',
 'd19_relat_1',
 't24_funct_2',
 't65_funct_1',
 't3_subset',
 't46_funct_1',
 't30_funct_2',
 'redefinition_k2_relset_1',
 'd10_xboole_0',
 't35_funct_2',
 'dt_k6_partfun1',
 'redefinition_k6_partfun1',
 'redefinition_k1_relset_1',
 'redefinition_k1_partfun1',
 'fc6_funct_1',
 'cc2_relset_1',
 'redefinition_r2_relset_1',
 'cc1_relset_1',
 'dt_k2_funct_1',
 'dt_k1_partfun1',
 't78_relat_1',
 't55_funct_1',
 't65

In [52]:
len(fts)

10241

In [53]:
conj_name = conj_test[0]
score_and_prove(forest, stmts[conj_name])

CompletedProcess(args='python3 run_E_prover.py --conjecture t34_waybel_0 --premises premises.txt --statements data/statements', returncode=0)


In [54]:
conj_name

't34_waybel_0'

In [55]:
conj_name = list(train_dep.keys())[0]
score_and_prove(forest, stmts[conj_name])

CompletedProcess(args='python3 run_E_prover.py --conjecture t3_xboole_0 --premises premises.txt --statements data/statements', returncode=0)


In [68]:
res = subprocess.run("python3 run_E_prover.py --conjecture " + conj_name
               + ' --premises premises.txt'
               + ' --statements data/statements', shell = True, capture_output=True)