In [315]:
import pickle
import spacy
import pandas as pd
import numpy as np
from sklearn.naive_bayes import MultinomialNB, GaussianNB, BernoulliNB, CategoricalNB
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn import metrics

In [316]:
nlp = spacy.load("en_core_web_sm", disable=["parser", "textcat", "ner"])

In [317]:
df_red = pd.read_csv('../data/df_red.csv', index_col = 0)
df_w_r = pd.read_csv('../data/df_w_r.csv', index_col = 0)
df_white = pd.read_csv('../data/df_white.csv', index_col = 0)
df_all = pd.read_csv('../data/df_nonans.csv', index_col = 0)

In [318]:
def custom_tokenizer(text):
    '''
    used to filter out unwanted words, punctuation, and so on
    '''
    tokens = []
    for t in nlp(text):
        if not(len(t) < 2 or t.is_stop or t.like_num or 
               t.is_punct or not t.is_alpha):
            tokens.append(t.lemma_)
    return tokens 

In [319]:
with open('../models/bern/bow.p', 'rb') as f:
    bow_all = pickle.load(f)
with open('../models/bern/bow_red.p', 'rb') as f:
    bow_red = pickle.load(f)
with open('../models/bern/bow_w_r.p', 'rb') as f:
    bow_w_r = pickle.load(f)
with open('../models/bern/bow_white.p', 'rb') as f:
    bow_white = pickle.load(f)

In [320]:
with open('../models/bern/m.p', 'rb') as f:
    m_all = pickle.load(f)
with open('../models/bern/m_red.p', 'rb') as f:
    m_red = pickle.load(f)
with open('../models/bern/m_w_r.p', 'rb') as f:
    m_w_r = pickle.load(f)
with open('../models/bern/m_white.p', 'rb') as f:
    m_white = pickle.load(f)

In [321]:
query = []
model = []

In [322]:
wine_type = input('Would you like red, white or either: ')

Would you like red, white or either: red


In [324]:
if wine_type == 'red':
    model.append('red')
elif wine_type == 'white':
    rose = input('Should we include Rosé wine too? ')
    if rose == 'yes': 
        model.append('white_rose')
    elif rose == 'no':
        model.append('white')
elif wine_type == 'either':
    model.append('all')
else:
    wine_type = input('Would you like red, white or either:')

In [325]:
style = input('Do you prefer sweet or dry wine: ')

Do you prefer sweet or dry wine: dry


In [326]:
if style == 'sweet':
    query.append('sweet')
    query.append('sweetness')
    query.append('sugar')
elif style == 'dry':
    query.append('dry')
    query.append('savory')
    query.append('bitter')

In [327]:
print('Which best describes your preferred wine style:')
print('1. Light and refreshing')
print('2. Smooth and balanced')
print('3. Full and rich')
style_2 = input('Enter 1, 2, or 3: ')

Which best describes your preferred wine style:
1. Light and refreshing
2. Smooth and balanced
3. Full and rich
Enter 1, 2, or 3: 2


In [328]:
if style_2 == '1':
    query.append('light')
    query.append('refreshing')
    query.append('bright')
    query.append('racy')
elif style_2 == '2':
    query.append('smooth')
    query.append('balanced')
    query.append('polished')
    query.append('silky')
elif style_2 == '3':
    query.append('full')
    query.append('rich')
    query.append('dense')
    query.append('tannin')
    query.append('tannic')
    query.append('chewy')
    query.append('heavy')
else:
    style_2 = input('Enter 1, 2, or 3: ')
    #continue

In [329]:
appealing = input('Which is more appealing fruity or earthy: ')

Which is more appealing fruity or earthy: fruity


In [330]:
if appealing == 'fruity':
    query.append('fruity')
    query.append('fruitiness')
    query.append('jam')
    query.append('jammy')
elif appealing == 'earthy':
    query.append('earthy')
    query.append('earth')
    query.append('soil')
    query.append('minerality')
    query.append('graphite')
    
else:
    appealing = input('Which is more appealing fruity or earthy: ')

In [108]:
#print('Which do you prefer: ')
#print('1. Wines that finish soft and light.')
#print('2. Wines that linger on your palate after they have finished.')
#finish = input('Enter 1 or 2: ')

In [109]:
#if finish == '1':
#    query.append('soft')
#    query.append('light')
#    query.append('fine')
#    query.append('round')
#elif finish == '2':
#    query.append('long')
#    query.append('linger')
#    query.append('texture')  
    
#this question asked with a 1 seems to help pinot nior. that good or bad?

In [331]:
flavor = input('Do you like flavors that remind you of caramel, toast and spice (yes or no)? ')

Do you like flavors that remind you of caramel, toast and spice (yes or no)? yes


In [304]:
if flavor == 'yes':
    query.append('carmel')
    query.append('toast')
    query.append('spice')
    query.append('oak')
    query.append('oaky')
    query.append('smokey')
    query.append('toasty')
    query.append('smoke')
    query.append('cedar')
    query.append('tea')

In [332]:
query

['dry',
 'savory',
 'bitter',
 'smooth',
 'balanced',
 'polished',
 'silky',
 'fruity',
 'fruitiness',
 'jam',
 'jammy']

In [333]:
query = [' '.join(query)]

In [334]:
query

['dry savory bitter smooth balanced polished silky fruity fruitiness jam jammy']

In [336]:
model = ['red']

In [337]:
model

['red']

In [338]:
if model[0] == 'all':
    m = m_all
    bow = bow_all
elif model[0] == 'red':
    m = m_red
    bow = bow_red
elif model[0] == 'white':
    m = m_white
    bow = bow_white
elif model[0] == 'white_rose':
    m = m_w_r
    bow = bow_w_r

In [339]:
m.predict(bow.transform(query))

array(['Zinfandel'], dtype='<U29')

In [340]:
m.predict_proba(bow.transform(query)).max()

0.37626135477582134

In [177]:
m.predict_proba(bow.transform(query))

array([[1.65737998e-02, 1.36292992e-06, 8.92422950e-03, 5.99728419e-02,
        2.43910737e-04, 5.38968395e-05, 1.17237618e-16, 2.62148630e-05,
        7.70855580e-04, 2.10191172e-02, 3.69972912e-07, 4.98679816e-05,
        6.85724405e-01, 2.35331054e-13, 8.23810511e-16, 1.38336548e-01,
        8.50677492e-06, 1.27856904e-04, 2.60546425e-05, 1.45811794e-04,
        6.66655573e-02, 9.73681881e-04, 2.77341454e-04, 7.77688081e-05]])

In [341]:
df_pred = pd.DataFrame(m.predict_proba(bow.transform(query)).T.round(4), index=m.classes_ ,columns=['probability'])

In [342]:
df_pred.sort_values(by='probability', ascending=False)
#if full and rich negate Pinot Noir

Unnamed: 0,probability
Zinfandel,0.3763
Pinot Noir,0.3665
Bordeaux-style Red Blend,0.0792
Merlot,0.0399
"Corvina, Rondinella, Molinara",0.0374
Syrah,0.0251
Port,0.0171
Cabernet Franc,0.0154
Barbera,0.0138
Cabernet Sauvignon,0.0126
