In [1]:
import pickle
import pandas as pd
import numpy as np

In [2]:
# open a file, where you stored the pickled data
file = open('wine_word2vec_model', 'rb')

# dump information to that file
wine_word2vec_model = pickle.load(file)

# close the file
file.close()

# Same for the rest of the files
file = open('model_knn', 'rb')
model_knn = pickle.load(file)
file.close()

file = open('dict_of_tfidf_weightings', 'rb')
dict_of_tfidf_weightings = pickle.load(file)
file.close()

file = open('wine_reviews_mincount', 'rb')
wine_reviews_mincount = pickle.load(file)
file.close()

In [3]:
def descriptors_to_best_match_wines(list_of_descriptors, number_of_suggestions=10):
    weighted_review_terms = []
    for term in list_of_descriptors:
        if term not in dict_of_tfidf_weightings:
            if term not in descriptor_mapping.index:
                print('choose a different descriptor from', term)
                continue
            else:
                term = descriptor_mapping['normalized'][term]
        tfidf_weighting = dict_of_tfidf_weightings[term]
        word_vector = wine_word2vec_model.wv.get_vector(term).reshape(1, 300)
        weighted_word_vector = tfidf_weighting * word_vector
        weighted_review_terms.append(weighted_word_vector)
    review_vector = sum(weighted_review_terms)

    distance, indice = model_knn.kneighbors(review_vector, n_neighbors=number_of_suggestions+1)
    distance_list = distance[0].tolist()[1:]
    indice_list = indice[0].tolist()[1:]

    n = 1
    for d, i in zip(distance_list, indice_list):
        wine_name = wine_reviews_mincount['title'][i]
        wine_descriptors = wine_reviews_mincount['normalized_descriptors'][i]
        print('Suggestion', str(n), ':', wine_name, 'with a cosine distance of', "{:.3f}".format(d))
        print('This wine has the following descriptors:', wine_descriptors)
        print('')
        n+=1

In [4]:
descriptors = ['apple','pear','wood']
descriptors_to_best_match_wines(list_of_descriptors=descriptors, number_of_suggestions=5)

Suggestion 1 : Domaine Faiveley 2011 Les Villeranges  (Rully) with a cosine distance of 0.086
This wine has the following descriptors: ['fresh', 'fruit', 'citrus', 'pear', 'apple', 'wood', 'round']

Suggestion 2 : Domaine Ostertag 2010 Barriques Pinot Blanc (Alsace) with a cosine distance of 0.088
This wine has the following descriptors: ['oak', 'wood', 'toast', 'pear', 'honey', 'apple']

Suggestion 3 : A.R. Lenoble  NV Intense Brut  (Champagne) with a cosine distance of 0.111
This wine has the following descriptors: ['wood', 'rich', 'ripe', 'fruit', 'pear', 'crisp', 'apple', 'apricot']

Suggestion 4 : Domaine de la Garenne 2008  Mâcon-Azé with a cosine distance of 0.118
This wine has the following descriptors: ['wood', 'fruit', 'fresh', 'grapefruit', 'pear', 'bright', 'lemon', 'honey', 'wood']

Suggestion 5 : Wheelhouse 2011 Chardonnay (Sonoma County) with a cosine distance of 0.130
This wine has the following descriptors: ['ripe', 'apple', 'pear', 'tropical_fruit', 'fruit', 'oak']



In [5]:
wine_reviews_mincount

Unnamed: 0,level_0,index,country,description,designation,points,price,province,region_1,title,variety,winery,normalized_descriptors,review_vector,descriptor_count
0,0,0,Italy,"Aromas include tropical fruit, broom, brimston...",Vulkà Bianco,87,,Sicily & Sardinia,Etna,Nicosia 2013 Vulkà Bianco (Etna),White Blend,Nicosia,"[tropical_fruit, fruit, dry, herb, apple, citr...","[[-1.1824377, -4.7284617, -1.5751477, -1.71426...",9
1,1,1,Portugal,"This is ripe and fruity, a wine that is smooth...",Avidagos,87,15.0,Douro,,Quinta dos Avidagos 2011 Avidagos Red (Douro),Portuguese Red,Quinta dos Avidagos,"[ripe, fruit, smooth, firm, juicy, berry, frui...","[[-1.5244689, -3.2667427, 0.5174258, -0.595197...",8
2,2,2,US,"Tart and snappy, the flavors of lime flesh and...",,87,14.0,Oregon,Willamette Valley,Rainstorm 2013 Pinot Gris (Willamette Valley),Pinot Gris,Rainstorm,"[tart, snappy, lime, green, pineapple, crisp, ...","[[-5.0686564, -2.5592537, -1.0183173, 2.063115...",7
3,3,3,US,"Pineapple rind, lemon pith and orange blossom ...",Reserve Late Harvest,87,13.0,Michigan,Lake Michigan Shore,St. Julian 2013 Reserve Late Harvest Riesling ...,Riesling,St. Julian,"[pineapple, rind, lemon_pith, orange_blossom, ...","[[0.2353328, -3.7015162, 2.524659, -0.20941763...",6
4,5,5,Spain,Blackberry and raspberry aromas show a typical...,Ars In Vitro,87,15.0,Northern Spain,Navarra,Tandem 2011 Ars In Vitro Tempranillo-Merlot (N...,Tempranillo-Merlot,Tandem,"[blackberry, raspberry, green, herb, full_bodi...","[[-1.9060161, -1.6312248, 0.7486735, -1.922590...",13
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
108872,129965,129965,France,"While it's rich, this beautiful dry wine also ...",Seppi Landmann Vallée Noble,90,28.0,Alsace,Alsace,Domaine Rieflé-Landmann 2013 Seppi Landmann Va...,Pinot Gris,Domaine Rieflé-Landmann,"[rich, dry, fresh, ripe, white, fruit, pear, a...","[[-1.7249664, -3.4387279, -1.2478399, 0.017392...",9
108873,129966,129966,Germany,Notes of honeysuckle and cantaloupe sweeten th...,Brauneberger Juffer-Sonnenuhr Spätlese,90,28.0,Mosel,,Dr. H. Thanisch (Erben Müller-Burggraef) 2013 ...,Riesling,Dr. H. Thanisch (Erben Müller-Burggraef),"[honeysuckle, cataloupe, sweet, juicy, tart, t...","[[-3.650302, -4.2121887, -1.7558427, 2.5154157...",9
108874,129967,129967,US,Citation is given as much as a decade of bottl...,,90,75.0,Oregon,Oregon,Citation 2004 Pinot Noir (Oregon),Pinot Noir,Citation,"[baked, cherry, cocoa, coconut, soft, fruit, c...","[[-0.41930154, -0.7994918, 1.3697039, -2.10915...",7
108875,129969,129969,France,"A dry style of Pinot Gris, this is crisp with ...",,90,32.0,Alsace,Alsace,Domaine Marcel Deiss 2012 Pinot Gris (Alsace),Pinot Gris,Domaine Marcel Deiss,"[dry, crisp, weight, powerful, spice, baked_ap...","[[-1.3772521, -1.0705342, 2.4167092, 1.6873336...",6


In [6]:
rv = wine_reviews_mincount.review_vector

In [7]:
rv[0][0].dtype

dtype('float32')

In [8]:
def descriptors_to_best_match_wines_v2(list_of_descriptors, wineset=wine_reviews_mincount, number_of_suggestions=10):
    weighted_review_terms = []
    for term in list_of_descriptors:
        if term not in dict_of_tfidf_weightings:
            if term not in descriptor_mapping.index:
                print('choose a different descriptor from', term)
                continue
            else:
                term = descriptor_mapping['normalized'][term]
        tfidf_weighting = dict_of_tfidf_weightings[term]
        word_vector = wine_word2vec_model.wv.get_vector(term).reshape(1, 300)
        weighted_word_vector = tfidf_weighting * word_vector
        weighted_review_terms.append(weighted_word_vector)
    review_vector = sum(weighted_review_terms)

    distance, indice = model_knn.kneighbors(review_vector, n_neighbors=len(wine_reviews_mincount))
    distance_list = distance[0].tolist()[1:]
    indice_list = indice[0].tolist()[1:]

    n = 1
    for d, i in zip(distance_list, indice_list):
        if i in wineset.index:
            wine_name = wineset['title'][i]
            wine_descriptors = wineset['normalized_descriptors'][i]
            print('Suggestion', str(n), ':', wine_name, 'with a cosine distance of', "{:.3f}".format(d))
            print('This wine has the following descriptors:', wine_descriptors)
            print('')
            n+=1
        if n == number_of_suggestions + 1:
            break

In [9]:
descriptors_to_best_match_wines_v2(list_of_descriptors=descriptors, number_of_suggestions=5)

Suggestion 1 : Domaine Faiveley 2011 Les Villeranges  (Rully) with a cosine distance of 0.086
This wine has the following descriptors: ['fresh', 'fruit', 'citrus', 'pear', 'apple', 'wood', 'round']

Suggestion 2 : Domaine Ostertag 2010 Barriques Pinot Blanc (Alsace) with a cosine distance of 0.088
This wine has the following descriptors: ['oak', 'wood', 'toast', 'pear', 'honey', 'apple']

Suggestion 3 : A.R. Lenoble  NV Intense Brut  (Champagne) with a cosine distance of 0.111
This wine has the following descriptors: ['wood', 'rich', 'ripe', 'fruit', 'pear', 'crisp', 'apple', 'apricot']

Suggestion 4 : Domaine de la Garenne 2008  Mâcon-Azé with a cosine distance of 0.118
This wine has the following descriptors: ['wood', 'fruit', 'fresh', 'grapefruit', 'pear', 'bright', 'lemon', 'honey', 'wood']

Suggestion 5 : Wheelhouse 2011 Chardonnay (Sonoma County) with a cosine distance of 0.130
This wine has the following descriptors: ['ripe', 'apple', 'pear', 'tropical_fruit', 'fruit', 'oak']



In [10]:
wineset = wine_reviews_mincount.copy()
# condition mask
mask = wineset['country'] == 'France'
#mask = wineset['winery'] == 'Concha y Toro'


# new dataframe with selected rows
wineset = pd.DataFrame(wineset[mask])
#wineset = wineset['title'].str.contains('Beaujolais').any() 
#wineset = wineset[wineset['title'].str.contains('Beaujolais Nouveau')]

wineset


Unnamed: 0,level_0,index,country,description,designation,points,price,province,region_1,title,variety,winery,normalized_descriptors,review_vector,descriptor_count
7,9,9,France,This has great depth of flavor with its fresh ...,Les Natures,87,27.0,Alsace,Alsace,Jean-Baptiste Adam 2012 Les Natures Pinot Gris...,Pinot Gris,Jean-Baptiste Adam,"[depth, fresh, tree_fruit, fruit, spice, dry, ...","[[-2.2948205, -3.429711, -0.07856291, 0.952319...",7
9,11,11,France,"This is a dry wine, very spicy, with a tight, ...",,87,30.0,Alsace,Alsace,Leon Beyer 2012 Gewurztraminer (Alsace),Gewürztraminer,Leon Beyer,"[dry, spice, tight, minerality, citrus, pepper...","[[-1.4436648, -3.5878768, 0.45282426, -1.14376...",7
40,49,49,France,"Soft and fruity, this is a generous, ripe wine...",Eté Indien,86,14.0,Beaujolais,Brouilly,Vignerons de Bel Air 2011 Eté Indien (Brouilly),Gamay,Vignerons de Bel Air,"[soft, fruit, ripe, juicy, cherry, fruit, rich...","[[-1.9427114, -1.7901807, 1.4188309, -1.123458...",8
52,63,63,France,"This fat, yeasty Champagne is comprised predom...",Brut Rosé,86,58.0,Champagne,Champagne,Roland Champion NV Brut Rosé (Champagne),Champagne Blend,Roland Champion,"[fat, yeast, ripe, berry, fruit, light_bodied,...","[[-1.3036878, -1.3379736, -1.5018443, -0.77713...",8
53,65,65,France,"From the warm 2015 vintage, this is a soft and...",,86,24.0,Burgundy,Chablis,Simonnet-Febvre 2015 Chablis,Chardonnay,Simonnet-Febvre,"[warm, soft, fruit, white, fruit, light_bodied...","[[-1.5123539, -1.6475304, 0.54588705, -0.83126...",7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
108867,129959,129959,France,The granite soil of the Brand Grand Cru vineya...,Brand Grand Cru,90,57.0,Alsace,Alsace,Cave de Turckheim 2010 Brand Grand Cru Pinot G...,Pinot Gris,Cave de Turckheim,"[rich, ripe, fruit, dry, honey, yellow, fruit,...","[[-2.218728, -4.1326113, -1.3310887, 0.6956551...",9
108871,129964,129964,France,"Initially quite muted, this wine slowly develo...",Domaine Saint-Rémy Herrenweg,90,,Alsace,Alsace,Domaine Ehrhart 2013 Domaine Saint-Rémy Herren...,Gewürztraminer,Domaine Ehrhart,"[rich, spice, sweet, dry, spice, dry, honey]","[[-1.5443115, -3.4082165, 0.5985926, -0.977315...",7
108872,129965,129965,France,"While it's rich, this beautiful dry wine also ...",Seppi Landmann Vallée Noble,90,28.0,Alsace,Alsace,Domaine Rieflé-Landmann 2013 Seppi Landmann Va...,Pinot Gris,Domaine Rieflé-Landmann,"[rich, dry, fresh, ripe, white, fruit, pear, a...","[[-1.7249664, -3.4387279, -1.2478399, 0.017392...",9
108875,129969,129969,France,"A dry style of Pinot Gris, this is crisp with ...",,90,32.0,Alsace,Alsace,Domaine Marcel Deiss 2012 Pinot Gris (Alsace),Pinot Gris,Domaine Marcel Deiss,"[dry, crisp, weight, powerful, spice, baked_ap...","[[-1.3772521, -1.0705342, 2.4167092, 1.6873336...",6


In [11]:
descriptors_to_best_match_wines_v2(list_of_descriptors=descriptors, wineset=wineset, number_of_suggestions=5)

Suggestion 1 : Domaine Faiveley 2011 Les Villeranges  (Rully) with a cosine distance of 0.086
This wine has the following descriptors: ['fresh', 'fruit', 'citrus', 'pear', 'apple', 'wood', 'round']

Suggestion 2 : Domaine Ostertag 2010 Barriques Pinot Blanc (Alsace) with a cosine distance of 0.088
This wine has the following descriptors: ['oak', 'wood', 'toast', 'pear', 'honey', 'apple']

Suggestion 3 : A.R. Lenoble  NV Intense Brut  (Champagne) with a cosine distance of 0.111
This wine has the following descriptors: ['wood', 'rich', 'ripe', 'fruit', 'pear', 'crisp', 'apple', 'apricot']

Suggestion 4 : Domaine de la Garenne 2008  Mâcon-Azé with a cosine distance of 0.118
This wine has the following descriptors: ['wood', 'fruit', 'fresh', 'grapefruit', 'pear', 'bright', 'lemon', 'honey', 'wood']

Suggestion 5 : Château Haut-Brion 2007  Pessac-Léognan with a cosine distance of 0.137
This wine has the following descriptors: ['citrus', 'wood', 'rich', 'fruit', 'toast', 'apricot', 'pear', '

In [12]:
wine_reviews_mincount.country.unique()

array(['Italy', 'Portugal', 'US', 'Spain', 'Germany', 'France',
       'Argentina', 'Chile', 'Australia', 'Austria', 'South Africa',
       'New Zealand', 'Israel', 'Hungary', 'Greece', 'Romania', 'Mexico',
       'Canada', nan, 'Turkey', 'Czech Republic', 'Slovenia',
       'Luxembourg', 'Croatia', 'Georgia', 'Uruguay', 'England',
       'Lebanon', 'Serbia', 'Brazil', 'Moldova', 'Morocco', 'Peru',
       'India', 'Bulgaria', 'Cyprus', 'Armenia', 'Switzerland',
       'Bosnia and Herzegovina', 'Ukraine', 'Slovakia', 'Macedonia',
       'China', 'Egypt'], dtype=object)

In [13]:
wine_reviews_mincount.winery.value_counts()


Williams Selyem       202
Testarossa            199
Wines & Winemakers    168
Concha y Toro         159
Louis Latour          150
                     ... 
Casir Dos Santos        1
Huntaway Reserve        1
Onward                  1
Vriesenhof              1
Penedo Borges           1
Name: winery, Length: 15603, dtype: int64

In [14]:
descriptors = ['bright','sharp','pineapple','grapefruit']

In [15]:
descriptors_to_best_match_wines_v2(list_of_descriptors=descriptors, wineset=wineset, number_of_suggestions=5)

Suggestion 1 : Francis Blanchet 2012 Cuvée Silice  (Pouilly-Fumé) with a cosine distance of 0.136
This wine has the following descriptors: ['crisp', 'apple', 'citrus', 'minerality', 'bright', 'grapefruit']

Suggestion 2 : Vignobles Berthier 2013 Domaine de Montbenoit Sauvignon Blanc (Coteaux du Giennois) with a cosine distance of 0.139
This wine has the following descriptors: ['crisp', 'fresh', 'soft', 'pineapple', 'apricot', 'herbacious', 'lemon', 'grapefruit', 'ripe', 'fruit', 'bright']

Suggestion 3 : Château Lauretan 2010  Bordeaux Blanc with a cosine distance of 0.140
This wine has the following descriptors: ['grapefruit', 'pineapple', 'fresh', 'herbacious', 'crisp', 'clean']

Suggestion 4 : Château Lamothe-Vincent 2012  Bordeaux Blanc with a cosine distance of 0.145
This wine has the following descriptors: ['soft', 'light_bodied', 'tropical_fruit', 'grapefruit', 'pineapple', 'crisp', 'fruit']

Suggestion 5 : Jeeper NV Brut Grand Assemblage  (Champagne) with a cosine distance of 0

In [16]:
def descriptors_to_best_match_wines_v3(list_of_descriptors, wineset=wine_reviews_mincount, number_of_suggestions=10):
    weighted_review_terms = []
    for term in list_of_descriptors:
        if term not in dict_of_tfidf_weightings:
            if term not in descriptor_mapping.index:
                print('choose a different descriptor from', term)
                continue
            else:
                term = descriptor_mapping['normalized'][term]
        tfidf_weighting = dict_of_tfidf_weightings[term]
        word_vector = wine_word2vec_model.wv.get_vector(term).reshape(1, 300)
        weighted_word_vector = tfidf_weighting * word_vector
        weighted_review_terms.append(weighted_word_vector)
    review_vector = sum(weighted_review_terms)

    distance, indice = model_knn.kneighbors(review_vector, n_neighbors=len(wine_reviews_mincount))
    distance_list = distance[0].tolist()[1:]
    indice_list = indice[0].tolist()[1:]

    n = 1
    for d, i in zip(distance_list, indice_list):
        if i in wineset.index:
            wine_name = wineset['title'][i]
            wine_descriptors = wineset['normalized_descriptors'][i]
            print('Suggestion', str(n), ':', wine_name, 'with a cosine distance of', "{:.3f}".format(d))
            print('This wine has the following descriptors:', wine_descriptors)
            print('')
            # Let's add the actual description"
            #print(wineset['description'][i])
            print("")
            n+=1
        if n == number_of_suggestions + 1:
            break

In [17]:
descriptors_to_best_match_wines_v3(list_of_descriptors=descriptors, wineset=wineset, number_of_suggestions=5)

Suggestion 1 : Francis Blanchet 2012 Cuvée Silice  (Pouilly-Fumé) with a cosine distance of 0.136
This wine has the following descriptors: ['crisp', 'apple', 'citrus', 'minerality', 'bright', 'grapefruit']


Suggestion 2 : Vignobles Berthier 2013 Domaine de Montbenoit Sauvignon Blanc (Coteaux du Giennois) with a cosine distance of 0.139
This wine has the following descriptors: ['crisp', 'fresh', 'soft', 'pineapple', 'apricot', 'herbacious', 'lemon', 'grapefruit', 'ripe', 'fruit', 'bright']


Suggestion 3 : Château Lauretan 2010  Bordeaux Blanc with a cosine distance of 0.140
This wine has the following descriptors: ['grapefruit', 'pineapple', 'fresh', 'herbacious', 'crisp', 'clean']


Suggestion 4 : Château Lamothe-Vincent 2012  Bordeaux Blanc with a cosine distance of 0.145
This wine has the following descriptors: ['soft', 'light_bodied', 'tropical_fruit', 'grapefruit', 'pineapple', 'crisp', 'fruit']


Suggestion 5 : Jeeper NV Brut Grand Assemblage  (Champagne) with a cosine distance 

In [18]:
descriptors

['bright', 'sharp', 'pineapple', 'grapefruit']

In [19]:
wineset

Unnamed: 0,level_0,index,country,description,designation,points,price,province,region_1,title,variety,winery,normalized_descriptors,review_vector,descriptor_count
7,9,9,France,This has great depth of flavor with its fresh ...,Les Natures,87,27.0,Alsace,Alsace,Jean-Baptiste Adam 2012 Les Natures Pinot Gris...,Pinot Gris,Jean-Baptiste Adam,"[depth, fresh, tree_fruit, fruit, spice, dry, ...","[[-2.2948205, -3.429711, -0.07856291, 0.952319...",7
9,11,11,France,"This is a dry wine, very spicy, with a tight, ...",,87,30.0,Alsace,Alsace,Leon Beyer 2012 Gewurztraminer (Alsace),Gewürztraminer,Leon Beyer,"[dry, spice, tight, minerality, citrus, pepper...","[[-1.4436648, -3.5878768, 0.45282426, -1.14376...",7
40,49,49,France,"Soft and fruity, this is a generous, ripe wine...",Eté Indien,86,14.0,Beaujolais,Brouilly,Vignerons de Bel Air 2011 Eté Indien (Brouilly),Gamay,Vignerons de Bel Air,"[soft, fruit, ripe, juicy, cherry, fruit, rich...","[[-1.9427114, -1.7901807, 1.4188309, -1.123458...",8
52,63,63,France,"This fat, yeasty Champagne is comprised predom...",Brut Rosé,86,58.0,Champagne,Champagne,Roland Champion NV Brut Rosé (Champagne),Champagne Blend,Roland Champion,"[fat, yeast, ripe, berry, fruit, light_bodied,...","[[-1.3036878, -1.3379736, -1.5018443, -0.77713...",8
53,65,65,France,"From the warm 2015 vintage, this is a soft and...",,86,24.0,Burgundy,Chablis,Simonnet-Febvre 2015 Chablis,Chardonnay,Simonnet-Febvre,"[warm, soft, fruit, white, fruit, light_bodied...","[[-1.5123539, -1.6475304, 0.54588705, -0.83126...",7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
108867,129959,129959,France,The granite soil of the Brand Grand Cru vineya...,Brand Grand Cru,90,57.0,Alsace,Alsace,Cave de Turckheim 2010 Brand Grand Cru Pinot G...,Pinot Gris,Cave de Turckheim,"[rich, ripe, fruit, dry, honey, yellow, fruit,...","[[-2.218728, -4.1326113, -1.3310887, 0.6956551...",9
108871,129964,129964,France,"Initially quite muted, this wine slowly develo...",Domaine Saint-Rémy Herrenweg,90,,Alsace,Alsace,Domaine Ehrhart 2013 Domaine Saint-Rémy Herren...,Gewürztraminer,Domaine Ehrhart,"[rich, spice, sweet, dry, spice, dry, honey]","[[-1.5443115, -3.4082165, 0.5985926, -0.977315...",7
108872,129965,129965,France,"While it's rich, this beautiful dry wine also ...",Seppi Landmann Vallée Noble,90,28.0,Alsace,Alsace,Domaine Rieflé-Landmann 2013 Seppi Landmann Va...,Pinot Gris,Domaine Rieflé-Landmann,"[rich, dry, fresh, ripe, white, fruit, pear, a...","[[-1.7249664, -3.4387279, -1.2478399, 0.017392...",9
108875,129969,129969,France,"A dry style of Pinot Gris, this is crisp with ...",,90,32.0,Alsace,Alsace,Domaine Marcel Deiss 2012 Pinot Gris (Alsace),Pinot Gris,Domaine Marcel Deiss,"[dry, crisp, weight, powerful, spice, baked_ap...","[[-1.3772521, -1.0705342, 2.4167092, 1.6873336...",6


In [20]:
dict_of_tfidf_weightings

{'abrasive': 7.675207893799539,
 'acacia': 7.119082510803884,
 'acid_driven': 8.021484130517372,
 'aggressive': 6.711289112936129,
 'aid': 8.57038170223277,
 'airy': 8.309166202969154,
 'allspice': 7.1296274239805,
 'almond': 5.2139527320934995,
 'alpine_herbs': 7.930887235165146,
 'american_oak': 7.833431899014433,
 'angular': 7.147453207933101,
 'anise': 5.196928849204271,
 'apple': 3.403380699289796,
 'apple_sauce': 8.664200457450427,
 'apricot': 4.456088196231678,
 'ash': 7.840600388493045,
 'asian_spice': 7.108647633511305,
 'asparagus': 7.598924589049908,
 'asphalt': 6.771187254517198,
 'assertive': 6.5212455100482645,
 'astringent': 5.298601940459832,
 'austere': 5.980487740747238,
 'bacon': 6.12350244803401,
 'baked': 4.948631186167724,
 'baked_apple': 6.5135826373026955,
 'baked_bread': 7.663086533267194,
 'baking_spices': 4.919529643708074,
 'balsamic': 5.942042588837537,
 'banana': 6.214043655727164,
 'band': 8.57038170223277,
 'barbecue': 6.639509430541998,
 'bark': 8.05657

In [21]:
wine_reviews_mincount.columns

Index(['level_0', 'index', 'country', 'description', 'designation', 'points',
       'price', 'province', 'region_1', 'title', 'variety', 'winery',
       'normalized_descriptors', 'review_vector', 'descriptor_count'],
      dtype='object')

In [22]:
wine_reviews_mincount.isnull().sum()

level_0                       0
index                         0
country                      62
description                   0
designation               30201
points                        0
price                      7132
province                     62
region_1                      0
title                         0
variety                       1
winery                        0
normalized_descriptors        0
review_vector                 0
descriptor_count              0
dtype: int64

In [23]:
french_wines = wine_reviews_mincount[wine_reviews_mincount.country=="France"]
french_wines

Unnamed: 0,level_0,index,country,description,designation,points,price,province,region_1,title,variety,winery,normalized_descriptors,review_vector,descriptor_count
7,9,9,France,This has great depth of flavor with its fresh ...,Les Natures,87,27.0,Alsace,Alsace,Jean-Baptiste Adam 2012 Les Natures Pinot Gris...,Pinot Gris,Jean-Baptiste Adam,"[depth, fresh, tree_fruit, fruit, spice, dry, ...","[[-2.2948205, -3.429711, -0.07856291, 0.952319...",7
9,11,11,France,"This is a dry wine, very spicy, with a tight, ...",,87,30.0,Alsace,Alsace,Leon Beyer 2012 Gewurztraminer (Alsace),Gewürztraminer,Leon Beyer,"[dry, spice, tight, minerality, citrus, pepper...","[[-1.4436648, -3.5878768, 0.45282426, -1.14376...",7
40,49,49,France,"Soft and fruity, this is a generous, ripe wine...",Eté Indien,86,14.0,Beaujolais,Brouilly,Vignerons de Bel Air 2011 Eté Indien (Brouilly),Gamay,Vignerons de Bel Air,"[soft, fruit, ripe, juicy, cherry, fruit, rich...","[[-1.9427114, -1.7901807, 1.4188309, -1.123458...",8
52,63,63,France,"This fat, yeasty Champagne is comprised predom...",Brut Rosé,86,58.0,Champagne,Champagne,Roland Champion NV Brut Rosé (Champagne),Champagne Blend,Roland Champion,"[fat, yeast, ripe, berry, fruit, light_bodied,...","[[-1.3036878, -1.3379736, -1.5018443, -0.77713...",8
53,65,65,France,"From the warm 2015 vintage, this is a soft and...",,86,24.0,Burgundy,Chablis,Simonnet-Febvre 2015 Chablis,Chardonnay,Simonnet-Febvre,"[warm, soft, fruit, white, fruit, light_bodied...","[[-1.5123539, -1.6475304, 0.54588705, -0.83126...",7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
108867,129959,129959,France,The granite soil of the Brand Grand Cru vineya...,Brand Grand Cru,90,57.0,Alsace,Alsace,Cave de Turckheim 2010 Brand Grand Cru Pinot G...,Pinot Gris,Cave de Turckheim,"[rich, ripe, fruit, dry, honey, yellow, fruit,...","[[-2.218728, -4.1326113, -1.3310887, 0.6956551...",9
108871,129964,129964,France,"Initially quite muted, this wine slowly develo...",Domaine Saint-Rémy Herrenweg,90,,Alsace,Alsace,Domaine Ehrhart 2013 Domaine Saint-Rémy Herren...,Gewürztraminer,Domaine Ehrhart,"[rich, spice, sweet, dry, spice, dry, honey]","[[-1.5443115, -3.4082165, 0.5985926, -0.977315...",7
108872,129965,129965,France,"While it's rich, this beautiful dry wine also ...",Seppi Landmann Vallée Noble,90,28.0,Alsace,Alsace,Domaine Rieflé-Landmann 2013 Seppi Landmann Va...,Pinot Gris,Domaine Rieflé-Landmann,"[rich, dry, fresh, ripe, white, fruit, pear, a...","[[-1.7249664, -3.4387279, -1.2478399, 0.017392...",9
108875,129969,129969,France,"A dry style of Pinot Gris, this is crisp with ...",,90,32.0,Alsace,Alsace,Domaine Marcel Deiss 2012 Pinot Gris (Alsace),Pinot Gris,Domaine Marcel Deiss,"[dry, crisp, weight, powerful, spice, baked_ap...","[[-1.3772521, -1.0705342, 2.4167092, 1.6873336...",6


In [24]:
french_wines.region_1.unique()

array(['Alsace', 'Brouilly', 'Champagne', 'Chablis',
       'Mâcon-Milly Lamartine', 'Vin de France', 'Saint-Émilion',
       'Moulin-à-Vent', 'Madiran', 'St.-Romain', 'Côtes du Rhône',
       'Vacqueyras', 'Vin de Pays des Côtes de Gascogne', 'Cahors',
       'Saint-Estèphe', 'Arbois', 'Bordeaux Blanc', 'Montrachet',
       'Clos de Vougeot', 'Chassagne-Montrachet',
       'Criots-Bâtard-Montrachet', 'Graves', 'Muscadet Sèvre et Maine',
       'Meursault', 'Chambolle-Musigny', 'Côtes du Rhône Villages',
       'Côtes de Provence', "Coteaux d'Aix-en-Provence",
       'Côtes de Provence Sainte-Victoire', 'Bordeaux Supérieur',
       'Costières de Nîmes', 'Bordeaux', '', 'Mâcon-Villages',
       'Côte Chalonnaise', 'Chorey-lès-Beaune', 'Haut-Médoc', 'Médoc',
       'Margaux', 'Beaujolais-Villages', 'Côtes de Gascogne', 'Marsannay',
       'Gevrey-Chambertin', 'Beaune', 'Minervois',
       'Blaye Côtes de Bordeaux', 'Gaillac', 'Sancerre', 'Montravel',
       'Quincy', 'Pouilly-Fumé', "Vin

In [25]:
french_wines.columns

Index(['level_0', 'index', 'country', 'description', 'designation', 'points',
       'price', 'province', 'region_1', 'title', 'variety', 'winery',
       'normalized_descriptors', 'review_vector', 'descriptor_count'],
      dtype='object')

In [26]:
wine_reviews_mincount.region_1

0                        Etna
1                            
2           Willamette Valley
3         Lake Michigan Shore
4                     Navarra
                 ...         
108872                 Alsace
108873                       
108874                 Oregon
108875                 Alsace
108876                 Alsace
Name: region_1, Length: 108877, dtype: object

In [33]:
def separator2(text: str):
    '''
    Converts strings from the API input into lists
    '''

    text = text.replace('+',' ').split(' ')
    for i in range(len(text)):
        text[i] = text[i].replace('*',' ')
    return text


In [34]:
separator2('pro+me+dio*bonito')

['pro', 'me', 'dio bonito']