In [2]:
import numpy as np
import os
import json

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.neural_network import MLPClassifier
from sklearn.decomposition import TruncatedSVD
from sklearn import metrics as mtr
import pickle

In [6]:
class QPC_model:
    def __init__(self):
        '''
        Arguments
            conf:         Configuration ID for saving or loading model
            ngram_range:  Ngrams to use when tokenizing
            min_df:       Minimum document frequency
            tsvd_cmp:     How many components to include in truncated SVD decomposition.
        '''
        self.ngram_range = (1,2)
        self.min_df = 1
        self.classes = np.array(['resource', 'date', 'number', 'string', 'boolean'])
        self.mlpc = None
        self.conf_opts = ['tc1','tc3','tc4'] 
        self.conf = self.conf_opts[1]
        
    def model(self):
        '''
        Method for training word vectorizer and neural network.
        '''
        self.cv, self.mlpc, self.tsvd = self._load_model(self.conf)


    def predict(self, query_list):
        '''
        Method for predicting category labels.
        Pass query_list as list of dictionaries with queries.
        '''
        self.model()
        if not self.mlpc:
            print('Model not loaded')
            return None
        queries = [q['question'] for q in query_list if q['question'] is not None]
        vec = self.cv.transform(queries)
        if self.tsvd:
            vec = self.tsvd.transform(vec)
        pred = self.mlpc.predict(vec)
        return pred
            
    def _load_model(self, conf):
        with open('qpccv-'+ conf + '.sav','rb') as f:
            cv = pickle.load(f)
        with open('qpcmlpc-' + conf + '.sav','rb') as f:
            mlpc = pickle.load(f)
        try:
            with open('qpctsvd-' + conf + '.sav','rb') as f:
                tsvd = pickle.load(f)
        except:
            tsvd = None
        return (cv, mlpc, tsvd)