Skip to content

Commit

Permalink
Merge pull request #107 from golastmile/test-percentage-setting
Browse files Browse the repository at this point in the history
Allow to specify percentage of test data during intent classification
  • Loading branch information
amn41 committed Jan 18, 2017
2 parents 63452b7 + feaead4 commit 5f28e23
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 8 deletions.
9 changes: 5 additions & 4 deletions src/classifiers/sklearn_intent_classifier.py
Expand Up @@ -42,18 +42,19 @@ def transform_labels_num2str(self, y):
labels = self.le.inverse_transform(y)
return labels

def train(self, X, y):
def train(self, X, y, test_split_size=0.1):
"""Train the intent classifier on a data set.
:param test_split_size: defines the percentage of examples to reserve for testing
:param X: Train data set
:param y: Train labels (numeric)"""

X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.1, random_state=0)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_split_size, random_state=0)
self.clf.fit(X_train, y_train)

# Test the trained model
logging.info("Score of intent model on test data: %s " % self.clf.score(X_test, y_test))
if test_split_size != 0.0:
logging.info("Score of intent model on test data: %s " % self.clf.score(X_test, y_test))

def predict_prob(self, X):
"""Given a bow vector of an input text, predict the intent label. Returns probabilities for all labels.
Expand Down
8 changes: 4 additions & 4 deletions src/trainers/spacy_sklearn_trainer.py
Expand Up @@ -24,20 +24,20 @@ def __init__(self, config, language_name):
self.intent_classifier = SklearnIntentClassifier()
self.entity_extractor = SpacyEntityExtractor()

def train(self, data):
def train(self, data, test_split_size=0.1):
self.training_data = data
self.train_entity_extractor(data.entity_examples)
self.train_intent_classifier(data.intent_examples)
self.train_intent_classifier(data.intent_examples, test_split_size)

def train_entity_extractor(self, entity_examples):
self.entity_extractor.train(self.nlp, entity_examples)

def train_intent_classifier(self, intent_examples):
def train_intent_classifier(self, intent_examples, test_split_size=0.1):
labels = [e["intent"] for e in intent_examples]
sentences = [e["text"] for e in intent_examples]
y = self.intent_classifier.transform_labels_str2num(labels)
X = self.featurizer.create_bow_vecs(sentences)
self.intent_classifier.train(X, y)
self.intent_classifier.train(X, y, test_split_size)

def persist(self, path, persistor=None, create_unique_subfolder=True):
timestamp = datetime.datetime.now().strftime('%Y%m%d-%H%M%S')
Expand Down

0 comments on commit 5f28e23

Please sign in to comment.