In [12]:
import json
from math import factorial
from itertools import combinations

import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, LogisticRegression

In [13]:
def read_json(filepath):
    with open(filepath) as f:
        return json.load(f)

def c(n , r):
    return factorial(n) // (factorial(n - r) * factorial(r))

In [14]:
size = 3
pgraphs = []

size_range = range(size)
combs = [combination for count in size_range for combination in combinations(size_range, count+1)]
combination_size = len(combs)

for i in range(combination_size):
    pgraphs.append(
        {
            'graph':read_json(f'graph_{i}.json'),
            'graph_data':read_json(f'graph_data_{i}.json'),
            'graph_score':read_json(f'graph_score_{i}.json'),
            'graph_accuracy':read_json(f'graph_accuracy_{i}.json')
        }
    )

In [15]:
len(pgraphs), combs

(7, [(0,), (1,), (2,), (0, 1), (0, 2), (1, 2), (0, 1, 2)])

In [16]:
for i, ch in enumerate(['~', '7', '~', '~7', '~~', '7~', '~7~']):
    print(i, ch, pgraphs[i]['graph_accuracy'][ch], pgraphs[i]['graph'][ch])

0 ~ 0.8782051282051282 {'~': 138, 'u': 10, '6': 1, '7': 1, '8': 2, '9': 2, '1': 2}
1 7 0.25806451612903225 {'t': 2, '~': 11, '_': 2, '5': 3, '8': 2, '1': 2, '2': 2, '3': 2, '4': 2, 'c': 1, '6': 1, '7': 1}
2 ~ 0.10256410256410256 {'8': 10, '9': 7, 'c': 10, '5': 21, '6': 19, '7': 12, '1': 17, '2': 20, '3': 20, '4': 20}
3 ~7 0.6666666666666666 {'8': 2, '~': 10}
4 ~~ 0.0 {'6': 1, '7': 1, '8': 2, '9': 2, '1': 2}
5 7~ 0.125 {'5': 3, '8': 2, '1': 2, '2': 2, '3': 2, '4': 2, 'c': 1, '6': 1, '7': 1}
6 ~7~ 0.5 {'8': 2}


In [17]:
print(pgraphs[0]['graph_data']['~'])

[['~co', '~co', '~co', '~co', '~5~', '~6~', '~7~', '~8~', '~9~', '~co', '~co', '~co', '~co', '~co', '~co', '~7~', '~8~', '~9~'], ['~89', '~90', '~91', '~92', '~93', '~94', '~10', '~11', '~12', '~13', '~14', '~15', '~16', '~17', '~18', '~19', '~20', '~21', '~22', '~23', '~24', '~25', '~26', '~27', '~28', '~29', '~30', '~31', '~32', '~33', '~34', '~35', '~36', '~37', '~38', '~39', '~40', '~41', '~42', '~43', '~44', '~45', '~46', '~47', '~48', '~49', '~50', '~51', '~52', '~53', '~54', '~55', '~56', '~57', '~58', '~59', '~60', '~61', '~62', '~63', '~64', '~65', '~66', '~67', '~14', '~15', '~16', '~17', '~18', '~19', '~20', '~21', '~22', '~23', '~24', '~25', '~26', '~27', '~28', '~29', '~30', '~31', '~32', '~33', '~34', '~35', '~36', '~37', '~38', '~39', '~40', '~41', '~42', '~43', '~44', '~45', '~46', '~47', '~48', '~49', '~50', '~51', '~52', '~53', '~54', '~55', '~56', '~57', '~58', '~59', '~60', '~61', '~62', '~63', '~64', '~65', '~66', '~67', '~68', '~69', '~70', '~71', '~72', '~73', '~

In [41]:
class Modeler:
    def __init__(self):
        # the input data
        self.x = []
        
        # the output data
        self.y = []
        
        #initialize model for classification and prediction
        self.initialize_classifier()
        
    def initialize_classifier(self):
        # define model
        self.classifier = LogisticRegression(solver='liblinear')
    
    def add_data(self, context, label):
        self.x.append(list(map(ord, context)))
        self.y.append(label)
        
    def train(self):
        x, y = np.array(self.x), np.array(self.y)
        X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2)
        
        # fit the keras model on the dataset
        self.classifier.fit(X_train, y_train)
        
        # make class predictions with the model
        y_pred = self.classifier.predict(X_test)
        
        for i in range(len(X_test)):
            score = '*' if y_test[i] != y_pred[i] else ''
            print(f"{X_test[i].tolist().__str__():15s} expected => {y_test[i]}, predicted => {y_pred[i]} {score}")
        
        # calc accuracy
        accuracy = self.classifier.score(X_test, y_test)
        
        return accuracy

In [34]:
data = pgraphs[0]['graph_data']['~']

modeler = Modeler()

for index, li in enumerate(data):
    for value in li:
        modeler.add_data(value, index)
        
modeler.add_data('~x9', 0)
modeler.train()

[126, 50, 50]   expected => 1, predicted => 1 
[126, 49, 49]   expected => 1, predicted => 1 
[126, 53, 57]   expected => 1, predicted => 1 
[126, 57, 48]   expected => 1, predicted => 1 
[126, 52, 56]   expected => 1, predicted => 1 
[126, 51, 50]   expected => 1, predicted => 1 
[126, 56, 51]   expected => 1, predicted => 1 
[126, 53, 56]   expected => 1, predicted => 1 
[126, 52, 54]   expected => 1, predicted => 1 
[126, 52, 52]   expected => 1, predicted => 1 
[126, 49, 57]   expected => 1, predicted => 1 
[126, 49, 55]   expected => 1, predicted => 1 
[126, 57, 49]   expected => 1, predicted => 1 
[126, 55, 126]  expected => 0, predicted => 0 
[126, 51, 53]   expected => 1, predicted => 1 
[126, 50, 53]   expected => 1, predicted => 1 
[126, 51, 49]   expected => 1, predicted => 1 
[126, 49, 52]   expected => 1, predicted => 1 
[126, 50, 52]   expected => 1, predicted => 1 
[126, 54, 57]   expected => 1, predicted => 1 
[126, 55, 126]  expected => 0, predicted => 0 
[126, 53, 52]

1.0

In [43]:
trial = '~z9'
modeler.classifier.predict(np.array([list(map(ord, trial))])), trial in data[0], trial in data[1]

ValueError: Expected 2D array, got 1D array instead:
array=[].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.

In [37]:
# for i in range(len(modeler.x)):
#     print(modeler.x[i], modeler.y[i])

In [None]:
class Modeler:
    def __init__(self):
        # the input data
        self.x = []
        
        # the output data
        self.y = []
        
        #initialize model for classification and prediction
        self.initialize_classifier()
        self.initialize_predictor()
        
    def initialize_classifier(self):
        # define model
        self.classifier = LogisticRegression(solver='liblinear')
        
    def initialize_predictor(self):
        # define model
        self.predictor = LinearRegression()
    
    def add_data(self, context, label):
        self.x.append(list(map(ord, context)))
        self.y.append(label)
        
    def train(self):
        x, y = np.array(self.x), np.array(self.y)
        X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2)
        
        # fit the classifier on the dataset
        self.classifier.fit(X_train, y_train)
        
        # fit the predictor on the dataset
        self.predictor.fit(X_train, y_train)
        
        # make class predictions with the classifier
        y_class_pred = self.classifier.predict(X_test)
        
        # make predictor predictions with the predictor
        y_pred = self.predictor.predict(X_test)
        
        for i in range(len(X_test)):
            score = '*' if y_test[i] != y_class_pred[i] else ''
            print(f"{X_test[i].tolist().__str__():15s} expected => {y_test[i]}, predicted => {y_class_pred[i]} {score}")
        
        # calc accuracy
        classifier_accuracy = self.classifier.score(X_test, y_test)
        predictor_accuracy = self.predictor.score(X_test, y_test)
        
        return accuracy