In [None]:
data = [
("Great fun", 1),
("Great time", 1),
("Happy fun", 1),
("Brilliant time", 1),
("Brilliant fun", 1),
("Terrible time", 0),
("Bad times", 0),
("Great mistake", 0),
("Terrible mistake", 0),
("Bad experience", 0)
]

data = [(d[0].lower(), d[1]) for d in data]

In [None]:
import pandas as pd 
import numpy as np
import logging

In [None]:
df = pd.DataFrame(data)
df.columns = ["x", "y"]
x = df["x"]
y = df["y"]

In order to make the vectorizer to transformer to classifier easier to work with, pipeline.  It behaves like a compound classifier.

In [None]:
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.naive_bayes import MultinomialNB

text_clf = Pipeline([
    ('vect', CountVectorizer()),
    ('tfidf', TfidfTransformer()),
    ('clf', MultinomialNB())
])

# We can now train the model with a single command:
clf = text_clf.fit(x, y)
predicted = clf.predict(x)

In [None]:
from sklearn import metrics
print(metrics.classification_report(y, predicted))

In [None]:
# Get a listing of all of the parameters we can control e.g. in a GridSearchCV
import pprint as pp
# pp.pprint(sorted(clf.get_params().keys()))

In [None]:
from sklearn.model_selection import GridSearchCV

parameters = {'vect__ngram_range': [(1,1), (1,2)],
              'tfidf__use_idf': (True, False),
              'clf__alpha': (1e-2, 1e-3),
             }

In [None]:
gs_clf = GridSearchCV(clf, parameters, n_jobs=-1)

In [None]:
gs_clf = gs_clf.fit(x, y)
pd.DataFrame(gs_clf.cv_results_)

In [None]:
from ipywidgets import interact, interactive, fixed, interact_manual

def predict(text):
    preds = clf.predict_proba([text])
    print("Probability negative: {0:.1f}%, Probability positive: {1:.1f}%".format(preds[0][0]*100, preds[0][1]*100))

i = interact(predict, text = "fun")

In [None]:
?GridSearchCV

In [None]:
# Want to try decision trees and maybe logit too?
# Add code that uses a pipeline

In [None]:
# Can we get more information about the context 
# e.g. the word 'great' here is either in the context of 'great mistake' or 'great fun'

In [None]:
# Equivalent to CountVectorizer followed by TfidfTransformer.
?TfidfVectorizer