In [1]:
import pickle
import spacy
import pandas as pd
import numpy as np
from sklearn.naive_bayes import MultinomialNB, GaussianNB, BernoulliNB, CategoricalNB
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn import metrics

In [3]:
nlp = spacy.load("en_core_web_sm", disable=["parser", "textcat", "ner"])

In [4]:
df = pd.read_csv('winemag-data-130k-v2.csv', index_col = 0)

In [5]:
df.columns

Index(['country', 'description', 'designation', 'points', 'price', 'province',
       'region_1', 'region_2', 'taster_name', 'taster_twitter_handle', 'title',
       'variety', 'winery'],
      dtype='object')

In [6]:
df

Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery
0,Italy,"Aromas include tropical fruit, broom, brimston...",Vulkà Bianco,87,,Sicily & Sardinia,Etna,,Kerin O’Keefe,@kerinokeefe,Nicosia 2013 Vulkà Bianco (Etna),White Blend,Nicosia
1,Portugal,"This is ripe and fruity, a wine that is smooth...",Avidagos,87,15.0,Douro,,,Roger Voss,@vossroger,Quinta dos Avidagos 2011 Avidagos Red (Douro),Portuguese Red,Quinta dos Avidagos
2,US,"Tart and snappy, the flavors of lime flesh and...",,87,14.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwine,Rainstorm 2013 Pinot Gris (Willamette Valley),Pinot Gris,Rainstorm
3,US,"Pineapple rind, lemon pith and orange blossom ...",Reserve Late Harvest,87,13.0,Michigan,Lake Michigan Shore,,Alexander Peartree,,St. Julian 2013 Reserve Late Harvest Riesling ...,Riesling,St. Julian
4,US,"Much like the regular bottling from 2012, this...",Vintner's Reserve Wild Child Block,87,65.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwine,Sweet Cheeks 2012 Vintner's Reserve Wild Child...,Pinot Noir,Sweet Cheeks
...,...,...,...,...,...,...,...,...,...,...,...,...,...
129966,Germany,Notes of honeysuckle and cantaloupe sweeten th...,Brauneberger Juffer-Sonnenuhr Spätlese,90,28.0,Mosel,,,Anna Lee C. Iijima,,Dr. H. Thanisch (Erben Müller-Burggraef) 2013 ...,Riesling,Dr. H. Thanisch (Erben Müller-Burggraef)
129967,US,Citation is given as much as a decade of bottl...,,90,75.0,Oregon,Oregon,Oregon Other,Paul Gregutt,@paulgwine,Citation 2004 Pinot Noir (Oregon),Pinot Noir,Citation
129968,France,Well-drained gravel soil gives this wine its c...,Kritt,90,30.0,Alsace,Alsace,,Roger Voss,@vossroger,Domaine Gresser 2013 Kritt Gewurztraminer (Als...,Gewürztraminer,Domaine Gresser
129969,France,"A dry style of Pinot Gris, this is crisp with ...",,90,32.0,Alsace,Alsace,,Roger Voss,@vossroger,Domaine Marcel Deiss 2012 Pinot Gris (Alsace),Pinot Gris,Domaine Marcel Deiss


In [7]:
counts = df['variety'].value_counts()

In [8]:
counts[counts > 500]

Pinot Noir                       13272
Chardonnay                       11753
Cabernet Sauvignon                9472
Red Blend                         8946
Bordeaux-style Red Blend          6915
Riesling                          5189
Sauvignon Blanc                   4967
Syrah                             4142
Rosé                              3564
Merlot                            3102
Nebbiolo                          2804
Zinfandel                         2714
Sangiovese                        2707
Malbec                            2652
Portuguese Red                    2466
White Blend                       2360
Sparkling Blend                   2153
Tempranillo                       1810
Rhône-style Red Blend             1471
Pinot Gris                        1455
Champagne Blend                   1396
Cabernet Franc                    1353
Grüner Veltliner                  1345
Portuguese White                  1159
Bordeaux-style White Blend        1066
Pinot Grigio             

In [9]:
df_trim = df[df['variety'].isin(counts[counts > 500].index)]

In [10]:
df_trim

Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery
0,Italy,"Aromas include tropical fruit, broom, brimston...",Vulkà Bianco,87,,Sicily & Sardinia,Etna,,Kerin O’Keefe,@kerinokeefe,Nicosia 2013 Vulkà Bianco (Etna),White Blend,Nicosia
1,Portugal,"This is ripe and fruity, a wine that is smooth...",Avidagos,87,15.0,Douro,,,Roger Voss,@vossroger,Quinta dos Avidagos 2011 Avidagos Red (Douro),Portuguese Red,Quinta dos Avidagos
2,US,"Tart and snappy, the flavors of lime flesh and...",,87,14.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwine,Rainstorm 2013 Pinot Gris (Willamette Valley),Pinot Gris,Rainstorm
3,US,"Pineapple rind, lemon pith and orange blossom ...",Reserve Late Harvest,87,13.0,Michigan,Lake Michigan Shore,,Alexander Peartree,,St. Julian 2013 Reserve Late Harvest Riesling ...,Riesling,St. Julian
4,US,"Much like the regular bottling from 2012, this...",Vintner's Reserve Wild Child Block,87,65.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwine,Sweet Cheeks 2012 Vintner's Reserve Wild Child...,Pinot Noir,Sweet Cheeks
...,...,...,...,...,...,...,...,...,...,...,...,...,...
129966,Germany,Notes of honeysuckle and cantaloupe sweeten th...,Brauneberger Juffer-Sonnenuhr Spätlese,90,28.0,Mosel,,,Anna Lee C. Iijima,,Dr. H. Thanisch (Erben Müller-Burggraef) 2013 ...,Riesling,Dr. H. Thanisch (Erben Müller-Burggraef)
129967,US,Citation is given as much as a decade of bottl...,,90,75.0,Oregon,Oregon,Oregon Other,Paul Gregutt,@paulgwine,Citation 2004 Pinot Noir (Oregon),Pinot Noir,Citation
129968,France,Well-drained gravel soil gives this wine its c...,Kritt,90,30.0,Alsace,Alsace,,Roger Voss,@vossroger,Domaine Gresser 2013 Kritt Gewurztraminer (Als...,Gewürztraminer,Domaine Gresser
129969,France,"A dry style of Pinot Gris, this is crisp with ...",,90,32.0,Alsace,Alsace,,Roger Voss,@vossroger,Domaine Marcel Deiss 2012 Pinot Gris (Alsace),Pinot Gris,Domaine Marcel Deiss


In [11]:
df_trim['type'] = 'red'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [12]:
df_trim.loc[df.variety == 'White Blend', 'type'] = "white"
df_trim.loc[df.variety == 'Chardonnay', 'type'] = "white"
df_trim.loc[df.variety == 'Riesling', 'type'] = "white"
df_trim.loc[df.variety == 'Sauvignon Blanc', 'type'] = "white"
df_trim.loc[df.variety == 'Sparkling Blend', 'type'] = "white"
df_trim.loc[df.variety == 'Pinot Gris', 'type'] = "white"
df_trim.loc[df.variety == 'Champagne Blend', 'type'] = "white"
df_trim.loc[df.variety == 'Grüner Veltliner ', 'type'] = "white"
df_trim.loc[df.variety == 'Portuguese White', 'type'] = "white"
df_trim.loc[df.variety == 'Bordeaux-style White Blend', 'type'] = "white"
df_trim.loc[df.variety == 'Pinot Grigio', 'type'] = "white"
df_trim.loc[df.variety == 'Gewürztraminer', 'type'] = "white"
df_trim.loc[df.variety == 'Viognier', 'type'] = "white"
df_trim.loc[df.variety == 'Glera', 'type'] = "white"
df_trim.loc[df.variety == 'Chenin Blanc', 'type'] = "white"

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s


In [13]:
df_trim.loc[df.variety == 'Rosé', 'type'] = "rose"

In [14]:
df_trim['sparkling'] = 'no'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [15]:
df_trim.loc[df.variety == 'Sparkling Blend', 'sparkling'] = "yes"
df_trim.loc[df.variety == 'Champagne Blend', 'sparkling'] = "yes"

In [21]:
sparkling = input('Would you like you wine to be sparkling: ')

Would you like you wine to be sparkling: yes


In [22]:
if sparkling == 'yes':
    df_filter = df_trim.loc[df_trim['sparkling']=='yes']
else:
     df_filter = df_trim.loc[df_trim['sparkling']=='no']

In [23]:
wine_type = input('Would you like white, red, rose or any: ')

Would you like white, red, rose or any: white


In [24]:
if wine_type == 'red':
    df_filter = df_filter.loc[df_trim['type']=='red']
elif wine_type == 'white': 
    df_filter = df_filter.loc[df_trim['type']=='white']
else:
    df_filter = df_filter.loc[df_trim['type']=='rose']

In [25]:
df_filter

Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery,type,sparkling
63,France,"This fat, yeasty Champagne is comprised predom...",Brut Rosé,86,58.0,Champagne,Champagne,,Roger Voss,@vossroger,Roland Champion NV Brut Rosé (Champagne),Champagne Blend,Roland Champion,white,yes
69,France,"This cooperative, based in Aÿ, has benefited f...",Brut Rosé,86,55.0,Champagne,Champagne,,Roger Voss,@vossroger,Collet NV Brut Rosé (Champagne),Champagne Blend,Collet,white,yes
334,France,"A ripe, fruity wine that takes its raspberry f...",Mosaïque Rosé Brut,86,42.0,Champagne,Champagne,,Roger Voss,@vossroger,Jacquart NV Mosaïque Rosé Brut (Champagne),Champagne Blend,Jacquart,white,yes
339,Spain,"Red in color, with berry and apple aromas, thi...",1887 Rosado,82,13.0,Catalonia,Cava,,Michael Schachner,@wineschach,Cavas Hill NV 1887 Rosado Sparkling (Cava),Sparkling Blend,Cavas Hill,white,yes
342,Spain,"Funky, yeasty aromas of cinnamon, spent lees a...",,82,11.0,Catalonia,Cava,,Michael Schachner,@wineschach,Bellisco NV Sparkling (Cava),Sparkling Blend,Bellisco,white,yes
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
129695,US,"Honey-sweet flavors of raspberries, pink grape...",Rosé,87,22.0,California,California,California Other,,,Chandon NV Rosé Sparkling (California),Sparkling Blend,Chandon,white,yes
129706,US,"Mostly Pinot Noir, with a few drops of Chardon...",VA de VI Ultra Cuvée,87,24.0,California,Sonoma County,Sonoma,,,Gloria Ferrer NV VA de VI Ultra Cuvée Sparklin...,Sparkling Blend,Gloria Ferrer,white,yes
129719,France,In order to maintain freshness and crispness i...,Domaine Vieilles Vignes Brut,90,49.0,Champagne,Champagne,,Roger Voss,@vossroger,Bruno Gobillard NV Domaine Vieilles Vignes Bru...,Champagne Blend,Bruno Gobillard,white,yes
129743,France,"Crisp and fruity, this mature-tasting wine has...",Réserve Brut,90,70.0,Champagne,Champagne,,Roger Voss,@vossroger,Castelnau NV Réserve Brut (Champagne),Champagne Blend,Castelnau,white,yes
