# Import libraries

In [1]:
import pandas as pd
import numpy as np
import random
pd.set_option('display.max_columns', None)

import warnings
warnings.filterwarnings('ignore')

In [2]:
import nltk
from nltk.tokenize import word_tokenize
from sklearn.feature_extraction.text import CountVectorizer

In [3]:
from sklearn import cluster, datasets
from sklearn.preprocessing import StandardScaler, MinMaxScaler, OneHotEncoder
from sklearn.cluster import KMeans

In [4]:
# import pymysql
# from sqlalchemy import create_engine
# import getpass 
# password = getpass.getpass()

In [5]:
# connection_string = 'mysql+pymysql://root:' + password + '@localhost/clean_winery'
# engine = create_engine(connection_string)

# Import dataframes

In [6]:
clustered_df = pd.read_csv('clustered.csv')

In [7]:
clustered_df.shape

(18389, 35)

In [8]:
all_data = pd.read_csv('all_data.csv')

In [9]:
all_data.shape

(21294, 34)

In [10]:
varietal_df = all_data[['wine_id','type','wine_name','country','region','varietal_name','winery','year','grapes_1_name','grapes_2_name','grapes_3_name']]

In [11]:
varietal_df.isna().sum()

wine_id             0
type                0
wine_name           0
country             0
region              0
varietal_name       0
winery              0
year                0
grapes_1_name    2829
grapes_2_name    2829
grapes_3_name    2829
dtype: int64

In [12]:
varietal_df = varietal_df.fillna('x')

In [13]:
wine_names = varietal_df[['wine_id','wine_name','winery']]

In [14]:
clustered_df = pd.merge(left = wine_names,
        right = clustered_df,
        how = 'inner', 
        left_on = "wine_id", 
        right_on= "wine_id")

In [15]:
varietal_df['varietal_name'] = np.where(varietal_df['varietal_name'].isin(['Grenache.']),'Garnacha',varietal_df['varietal_name'])
clustered_df['varietal_name'] = np.where(clustered_df['varietal_name'].isin(['Grenache.']),'Garnacha',clustered_df['varietal_name'])

In [21]:
clustered_df['body_description'].unique()

array(['High', 'Medium', 'Low', 'Very low'], dtype=object)

In [23]:
clustered_df['body_description'] = clustered_df['body_description'].map({'High':'Strong', 'Medium':'Medium', 'Low':'Weak', 'Very low':'Very Weak'})

In [24]:
clustered_df['body_description'].value_counts()

Strong       11688
Medium        4763
Weak          1783
Very Weak      155
Name: body_description, dtype: int64

In [25]:
clustered_df.to_csv('clustered_df.csv',index=False)
varietal_df.to_csv('varietal_df.csv',index=False)

# Copies to not be case sensitive

In [26]:
def case_folding(df,cols):
    df_folded = df.copy()
    for col in cols:
        df_folded[col] = [value.casefold() for value in df[col].values]
    return df_folded

In [27]:
cols = ['wine_name','winery','region','country','varietal_name','type','grapes_1_name','grapes_2_name','grapes_3_name']
clustered_folded = case_folding(clustered_df, cols)
varietal_folded = case_folding(varietal_df, cols)

# Tokenizing columns for later

In [28]:
def tokenizing_col(df,col):
    tokenized = []

    for row in df[col]:
        token = word_tokenize(row)
        tokenized.append(token)

    df['tokenized'] = tokenized
    
    return df

In [29]:
clustered_folded = tokenizing_col(clustered_folded,'wine_name')
varietal_folded = tokenizing_col(varietal_folded,'wine_name')

In [30]:
clustered_folded.to_csv('clustered_folded.csv',index=False)
varietal_folded.to_csv('varietal_folded.csv',index=False)

# Variety Recommender

In [None]:
def basic_recommender():
    text = input("what variety you like?   ").casefold()
    print(' ')

    if text in varietal_folded['varietal_name'].values:
        
        df = varietal_folded[varietal_folded['varietal_name'] == text]
                    
        recommended = random.choice(df['wine_id'].values)
        
        wine = varietal_df[varietal_df['wine_id']==recommended]['wine_name'].values
        region = varietal_df[varietal_df['wine_id']==recommended]['region'].values
        country = varietal_df[varietal_df['wine_id']==recommended]['country'].values
        
        print("If you like " + text + ", " + wine + ' from ' + region + ' in ' + country + ' is from the same variety.')
    else:
        print("Sorry I don't have any suggestions for that")
    return

In [None]:
basic_recommender()

# Grape Recommender

In [37]:
def grape_recommender():
    text = input("What variety of grape do you like?   ").casefold()
    print(' ')

    if text in varietal_folded['grapes_1_name'].values:
        yn = 'y'
    elif text in varietal_folded['grapes_2_name'].values:
        yn = 'y'
    elif text in varietal_folded['grapes_3_name'].values:
        yn = 'y'
    else:
        yn = 'n'

    if yn == 'y':
        df = varietal_folded[(varietal_folded['grapes_1_name']==text)|(varietal_folded['grapes_2_name']==text)|(varietal_folded['grapes_3_name']==text)]
        
        recommended = random.choice(df['wine_id'].values)
        
        wine = varietal_df[varietal_df['wine_id']==recommended]['wine_name'].values[0]
        region = varietal_df[varietal_df['wine_id']==recommended]['region'].values[0]
        country = varietal_df[varietal_df['wine_id']==recommended]['country'].values[0]

        print("Maybe you would like to try " + wine + ' from ' + region + ' in ' + country)
    else:
        print("Sorry I don't have any suggestions for that.")
    return

In [38]:
grape_recommender()

What variety of grape do you like?   garnacha
 
['Maybe you would like to try Acustic Celler Plaer Vinyes Velles  from Priorato in spain']


# Recommender by wine name

In [46]:
def name_recommender():
    text = input("Write a wine name and I will recommend a similar one:   ").casefold()
    print(' ')
    print('Checking my database...')
    print(' ')
    
    tokens = word_tokenize(text)
    
    bow_vect = CountVectorizer()
    bow_vect.fit(tokens)
    
    values = []
    for i in range(len(clustered_folded['tokenized'])):
        values.append(np.sum(bow_vect.transform(clustered_folded['tokenized'][i]).toarray()))
    
    indexes = []
    for i in range(len(values)):
        if values[i]==max(values):
            indexes.append(i)
    
    wine_name = clustered_df.loc[indexes[0]]['wine_name']
    
    confirm = input('Is ' + wine_name + ' the one you wrote?    Y/N    ').casefold()
    print(' ')
    
    i = 1
    while confirm == 'n':
        
        wine_name = clustered_df.loc[indexes[i]]['wine_name']
        confirm = input('Is ' + wine_name + ' the one you wrote?    Y/N    ').casefold()
        print(' ')
        
        if confirm == 'y':
            break
        
        i = i+1
        
        
        
    if confirm == 'y':
        
        cluster = clustered_folded.loc[indexes[i]]['clusters']
        df = clustered_df[clustered_df['clusters'] == cluster]
        
        recommended = random.choice(df['wine_id'].values)
        
        wine = df[df['wine_id']==recommended]['wine_name'].values
        region = df[df['wine_id']==recommended]['region'].values
        country = df[df['wine_id']==recommended]['country'].values
        
        print("My recommendation is " + wine + ' from ' + region + ' in ' + country)
    

    return

In [48]:
name_recommender()

Write a wine name and I will recommend a similar one:   paco y lola
 
Checking my database...
 
Is Paco & Lola Vintage Albariño  the one you wrote?    Y/N    n
 
Is Paco & Lola Follas Novas Albarino  the one you wrote?    Y/N    n
 
Is Paco & Lola Albariño  the one you wrote?    Y/N    n
 
Is Paco & Lola Prime Albariño Lias  the one you wrote?    Y/N    n
 
Is Paco & Lola No. 12 Albariño  the one you wrote?    Y/N    n
 
Is Paco & Lola LoLo Albariño  the one you wrote?    Y/N    y
 
['My recommendation is Altos de Torona Maremel Albariño  from Rías Baixas in spain']


# Recommender by features 

### (it has some problems, sometimes it doesn't work)

In [None]:
def wine_feature_recommender():
    red_white = input('Are you looking for a Red or White Wine?   R/W      ').casefold()
    print(' ')
    body = input('How do you like the body of your wine?  from 1-5 (Low-High)      ').casefold()
    print(' ')
    acidity = input('How do you like the acidity?   High(H) - Medium(M) - Low(L)    ').casefold()
    print(' ')
    
    if red_white == 'r':
        red_white = 'red'
    else:
        red_white = 'white'
        
        
    if acidity == 'h':
        acidity = 'High'
    elif acidity == 'm':
        acidity = 'Medium'
    else:
        acidity = 'Low'
    
    values_list = clustered_folded.loc[(clustered_folded['body']==int(body))&(clustered_folded['type']==red_white)&(clustered_folded['acidity_description']==acidity)]['wine_id'].values
    
    if len(values_list) > 0:
        recomendation = random.choice(values_list)
        print(recomendation)
        wine = clustered_df[clustered_df['wine_id']==recomendation]['wine_name'].values[0]
        print(wine)
        region = clustered_df[clustered_df['wine_id']==recomendation]['region'].values[0]
        print(region)
        country = clustered_df[clustered_df['wine_id']==recomendation]['country'].values[0]
        print(country)

        return 'I recommend you to try:    '+ wine+ ' from '+region+ ' in '+country
    else: 
        return print("Sorry, I don't have recomendations for those characteristics.")
    

In [None]:
wine_feature_recommender()

# Recommender with clustering inside

#### work in progress

In [None]:
def new_recommender():
    red_white = input('Red or White?   ').casefold()
    print(' ')
    country = input('Which country?    ').casefold()
    print(' ')
    
    df = clustered_folded[(clustered_folded['type']==red_white)&(clustered_folded['country']==country)].reset_index(drop=True)
    df_num = df.select_dtypes(np.number)
    df_num = df_num.drop(['wine_id','clusters','style_id'],axis=1)
    
    #fit and transform scaling numericals MinMax
    X_num = MinMaxScaler().fit_transform(df_num) 
    # convert to dataframe again
    X_num = pd.DataFrame(X_num,columns=df_num.columns).reset_index(drop=True) 
    
    #building clusters with default 12
    kmeans = KMeans(n_clusters=12)
    kmeans.fit(X_num)
    
    # Assigning the clusters:
    new_clusters = kmeans.predict(X_num)
    df['new_clusters'] = new_clusters
    
    # choosing one similar
    wine = input('One similar that you like?    ').casefold()
    print(' ')
    
    tokens = word_tokenize(wine)
    
    bow_vect = CountVectorizer()
    bow_vect.fit(tokens)
    
    values = []
    for i in range(len(df['tokenized'])):
        values.append(np.sum(bow_vect.transform(list(df['tokenized'])[i]).toarray()))

   
    indexes = []
    for i in range(len(values)):
        if values[i]==max(values):
            indexes.append(i)
    
    wine_name = df.loc[indexes[0]]['wine_name']
    
    confirm = input('Is ' + wine_name + ' the one you wrote?    Y/N    ').casefold()
    print(' ')

    
    i = 1
    while confirm == 'n':
        
        wine_name = df.loc[indexes[i]]['wine_name']
        confirm = input('Is ' + wine_name + ' the one you wrote?    Y/N    ').casefold()
        print(' ')
        
        if confirm == 'y':
            break
        
        i = i+1
        if i >len(indexes):
            print("Sorry, I didn't get the name right. Please, try again!")
            break
        
    
    if confirm == 'y':
        
        df2 = clustered_df[(clustered_folded['type']==red_white)&(clustered_folded['country']==country)].reset_index(drop=True)
        df2['new_clusters'] = new_clusters
        
        cluster = df2.loc[indexes[i]]['new_clusters']
        recommended = random.choice(df2[df2['new_clusters'] == cluster]['wine_name'].values)
        print('My recommendation is:    ' + recommended)

    return

In [None]:
new_recommender()

In [54]:
clustered_folded['country'].unique()

array(['portugal', 'spain', 'italy', 'france', 'argentina', 'australia',
       'brazil', 'chile', 'new-zealand', 'south-africa', 'united-states',
       'israel', 'germany', 'switzerland'], dtype=object)

# Recommender with more questions

#### If df.shape[0] < 12 just random.choice

In [55]:
def new_recommender_2():
    red_white = input('Red or White?   ').casefold()
    print(' ')

    country = input('Which country?    ').casefold()
    print(' ')
    
    region = input('Which region?    ').casefold()
    print(' ')
    
    
    variety = input('Which variety?    ').casefold()
    print(' ')
    
    
    grape = input('Which grape?    ').casefold()
    print(' ')
    
    
    body = input('How do you like the body of your wine?  High(H) - Medium(M) - Low(L) - Very low (V)    ').casefold()
    print(' ')
    
    
    acidity = input('How do you like the acidity?   High(H) - Medium(M) - Low(L)    ').casefold()
    print(' ')
    
    
        
    if red_white in clustered_folded['type'].values:
        df = clustered_folded[clustered_folded['type']==red_white]
    else:
        df = clusterd_folded
      
    
    if country in df['country'].values:
        df = df[df['country']==country]
    else:
        pass
        
        
    if region in df['region'].values:
        df = df[df['region']==region]
    else:
        pass
    
    if variety in df['varietal_name'].values:
        df = df[df['varietal_name']==variety]
    else:
        pass
    
#     Grapes = work in progess
#     if grape in df['grapes_1_name'].values:
#         df = df[df['grapes_1_name']==grape]
#     else:
#         pass
       
    
#     df = varietal_df[(varietal_df['grapes_1_name']==grapes)|(varietal_df['grapes_2_name']==grapes)|(varietal_df['grapes_3_name']==grapes)]

    if body in df['body_description'].values:
        df = df[df['body_description']==body]
    else:
        pass
    
    if acidity in df['acidity_description'].values:
        df = df[df['acidity_description']==acidity]
    else:
        pass
    
    if df.shape[0] < 12:
        
        recommended = random.choice(df['wine_id'].values)
        wine = clustered_df[clustered_df['wine_id']==recommended]['wine_name'].values[0]
        region = clustered_df[clustered_df['wine_id']==recommended]['region'].values[0]
        country = clustered_df[clustered_df['wine_id']==recommended]['country'].values[0]

        return print('My recommendation is:    ' + wine +' from ' + region+ ' in '+ country.capitalize())
    
    else:

        df_num = df[['body','acidity_1','acidity_2','fizziness','intensity','sweetness','tannin','num_ratings','rate']]


        #fit and transform scaling numericals MinMax
        transformer = MinMaxScaler().fit(df_num) 
        X_num = transformer.transform(df_num)
        # convert to dataframe again
        X_num = pd.DataFrame(X_num,columns=df_num.columns).reset_index(drop=True) 


        # HOW MANY CLUSTERS IN RELATION WITH HOW MANY VARIABLES¿?

        #building clusters with default 12
        kmeans = KMeans(n_clusters=12)
        kmeans.fit(X_num)

        # Assigning the clusters:
        new_clusters = kmeans.predict(X_num)
        df['new_clusters'] = new_clusters



        #CHOOSING SIMILARITY or NOT?
        similar_wine = input('Tell me a wine you like:      ').casefold()
        similar_token = word_tokenize(similar_wine)
        print(' ')
    #     different_wine = input("Tell me a wine you DON'T like:      ").casefold()
    #     different_token = word_tokenized(different_wine)
    #     print(' ')


        bow_vect = CountVectorizer()

        # For SIMILAR wine
        bow_vect.fit(similar_token)

        values = []
        for i in range(len(clustered_folded['tokenized'])):
            values.append(np.sum(bow_vect.transform(list(clustered_folded['tokenized'])[i]).toarray()))


        indexes = []
        for i in range(len(values)):
            if values[i]==max(values):
                indexes.append(i)

        wine_name = clustered_df.loc[indexes[0]]['wine_name']
        region_name = clustered_df.loc[indexes[0]]['region']

        confirm = input('Is ' + wine_name + ' from '+ region_name +' the one you wrote?    Y/N    ').casefold()
        print(' ')


        i = 1
        while confirm == 'n':

            wine_name = clustered_df.loc[indexes[i]]['wine_name']
            region_name = clustered_df.loc[indexes[i]]['region']

            confirm = input('Is ' + wine_name + ' from '+ region_name + ' the one you wrote?    Y/N    ').casefold()
            print(' ')

            if confirm == 'y':
                break

            i = i+1
            if i >len(indexes):
                print("Sorry, I didn't get the name right. Please, try again!")
                break


        if confirm == 'y':

            df2 = clustered_df.loc[indexes[i]]
            df2_num = pd.DataFrame(df2[['body','acidity_1','acidity_2','fizziness','intensity','sweetness','tannin','num_ratings','rate']]).T

            X2_num = transformer.transform(df2_num)
            # convert to dataframe again
            X2_num = pd.DataFrame(X2_num,columns=df2_num.columns).reset_index(drop=True) 
#             display(X2_num)

            #predict cluster
            X2_cluster = kmeans.predict(X2_num)
#             print(X2_cluster)

            values_list = df[df['new_clusters'] == X2_cluster[0]]['wine_id'].values

            recomendation = random.choice(values_list)

            wine = clustered_df[clustered_df['wine_id']==recomendation]['wine_name'].values[0]

            region = clustered_df[clustered_df['wine_id']==recomendation]['region'].values[0]

            country = clustered_df[clustered_df['wine_id']==recomendation]['country'].values[0]

            return print('My recommendation is:    ' + wine +' from ' + region + ' in ' + country.capitalize())


In [85]:
new_recommender_2()

Red or White?   white
 
Which country?    portugal
 
Which region?    sdfg
 
Which variety?    sdf
 
Which grape?    sdf
 
How do you like the body of your wine?  High(H) - Medium(M) - Low(L) - Very low (V)    m
 
How do you like the acidity?   High(H) - Medium(M) - Low(L)    m
 
Tell me a wine you like:      albariño
 
Is Paco & Lola Vintage Albariño  from Rías Baixas the one you wrote?    Y/N    y
 
My recommendation is:    Alves de Sousa Branco da Gaivosa Reserva  from Douro in Portugal


# Recommender by country

In [None]:
def by_country():
    

In [80]:
random.choice(cluster_list)

18

In [87]:
indexes = [4368, 4369, 4370, 4372, 4373, 4375, 4378, 4379, 4380, 4381, 4382, 4383, 4384, 4385, 4386, 4387, 4388, 4389, 4391, 4393, 4394, 4396, 6051, 6052, 6054, 6058, 6060, 6061, 6062, 6063, 6065, 6066, 6067, 6069, 6070, 6071, 6074, 6075, 6076, 7807, 7808, 7810, 7812, 7813, 7814, 7815, 7816, 7817, 7818, 7819, 7820, 7821, 7822, 7823, 7824, 7825, 7826, 7827, 7828, 7829, 7830, 7831, 9495, 9496, 9497, 9498, 9500, 9501, 9502, 9505, 9506, 9507, 9508, 9510, 9511, 9512, 9514, 9515, 9517, 11210, 11211, 11212, 11213, 11214, 11216, 11218, 11219, 11220, 11221, 11222, 11223, 11224, 11225, 11226, 11227, 11228, 11229, 11230, 11231, 11232, 11233, 11234, 12965, 12967, 12968, 12970, 12971, 12973, 12974, 12975, 12976, 12977, 12978, 12979, 12980, 12984, 12986, 12987, 12988, 12990, 12992, 12993, 12994, 12995, 12996, 12998, 12999, 14735, 14736, 14738, 14739, 14740, 14741, 14742, 14744, 14745, 14748, 14749, 14750, 14752, 14754, 14755, 14756, 14757, 16516, 16517, 16518, 16520, 16523, 16524, 16525, 16527, 16528, 16529, 16530, 16532, 16533, 16534, 16535, 18320, 18330]

In [88]:
clustered_df.loc

Unnamed: 0,wine_id,wine_name,winery,type,country,region,varietal_name,year,style_id,body,body_description,acidity_1,acidity_description,acidity_2,fizziness,intensity,sweetness,tannin,num_ratings,rate,grapes_1_id,grapes_1_name,grapes_1_seoname,grapes_1_count,grapes_2_id,grapes_2_name,grapes_2_seoname,grapes_2_count,grapes_3_id,grapes_3_name,grapes_3_seoname,grapes_3_count,totals,percent_1,percent_2,percent_3,clusters
16374,7817,Louis Latour Bâtard-Montrachet Grand Cru,Louis Latour,White,france,Bâtard-Montrachet Grand Cru,Cote Beaune,2006,285,4.0,Strong,2.0,Medium,3.18,0.0,3.81,1.67,0.0,1254,4.4,14.0,Pinot Noir,pinot-noir,572334.0,10.0,Merlot,merlot,566719.0,5.0,Chardonnay,chardonnay,604208.0,1743261.0,0.33,0.33,0.35,8


In [90]:
clustered_df[clustered_df['country']=='germany']

Unnamed: 0,wine_id,wine_name,winery,type,country,region,varietal_name,year,style_id,body,body_description,acidity_1,acidity_description,acidity_2,fizziness,intensity,sweetness,tannin,num_ratings,rate,grapes_1_id,grapes_1_name,grapes_1_seoname,grapes_1_count,grapes_2_id,grapes_2_name,grapes_2_seoname,grapes_2_count,grapes_3_id,grapes_3_name,grapes_3_seoname,grapes_3_count,totals,percent_1,percent_2,percent_3,clusters
4709,167747408,Rudolf Fürst Astheimer Chardonnay,Rudolf Fürst,White,germany,Stetten (Franconia),Chardonnay,2019,58,4.0,Strong,3.0,High,3.50,0.0,3.03,1.64,0.0,190,4.0,15.0,Riesling,riesling,262136.0,299.0,Spätburgunder,spatburgunder,50164.0,355.0,Weissburgunder,weissburgunder,33464.0,345764.0,0.76,0.15,0.1,32
4710,160221376,Zelt Kalkstein Chardonnay,Zelt,White,germany,Pfalz,Chardonnay,2018,58,4.0,Strong,3.0,High,3.72,0.0,2.80,1.04,0.0,104,3.8,15.0,Riesling,riesling,262136.0,299.0,Spätburgunder,spatburgunder,50164.0,355.0,Weissburgunder,weissburgunder,33464.0,345764.0,0.76,0.15,0.1,4
4711,170519715,St. Antony Chardonnay,St. Antony,White,germany,Rheinhessen,Chardonnay,2021,58,4.0,Strong,3.0,High,3.42,0.0,2.96,1.55,0.0,201,3.6,15.0,Riesling,riesling,262136.0,299.0,Spätburgunder,spatburgunder,50164.0,355.0,Weissburgunder,weissburgunder,33464.0,345764.0,0.76,0.15,0.1,32
4712,170885903,Alexander Laible Chardonnay Trocken,Alexander Laible,White,germany,Baden,Chardonnay,2021,58,4.0,Strong,3.0,High,3.33,0.0,3.15,2.28,0.0,458,4.0,15.0,Riesling,riesling,262136.0,299.0,Spätburgunder,spatburgunder,50164.0,355.0,Weissburgunder,weissburgunder,33464.0,345764.0,0.76,0.15,0.1,32
4713,167778330,Keller Weisser Burgunder - Chardonnay Trocken,Keller,White,germany,Rheinhessen,Chardonnay,2020,58,4.0,Strong,3.0,High,3.57,0.0,2.90,1.27,0.0,428,3.8,15.0,Riesling,riesling,262136.0,299.0,Spätburgunder,spatburgunder,50164.0,355.0,Weissburgunder,weissburgunder,33464.0,345764.0,0.76,0.15,0.1,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17503,160147835,K.Wechsler Westhofen Riesling Trocken,K.Wechsler,White,germany,Rheinhessen,Riesling,2019,186,2.0,Weak,3.0,High,4.72,0.0,2.82,1.26,0.0,783,3.8,15.0,Riesling,riesling,262136.0,299.0,Spätburgunder,spatburgunder,50164.0,355.0,Weissburgunder,weissburgunder,33464.0,345764.0,0.76,0.15,0.1,6
17537,163130744,Schneider Kaitui Sauvignon Blanc,Schneider,White,germany,Pfalz,Sauvignon Blanc,2020,201,2.0,Weak,3.0,High,4.36,0.0,2.29,1.45,0.0,5528,4.1,15.0,Riesling,riesling,262136.0,299.0,Spätburgunder,spatburgunder,50164.0,355.0,Weissburgunder,weissburgunder,33464.0,345764.0,0.76,0.15,0.1,6
17538,167981122,Weedenborn Sauvignon Blanc,Weedenborn,White,germany,Rheinhessen,Sauvignon Blanc,2021,201,2.0,Weak,3.0,High,4.33,0.0,2.31,1.34,0.0,860,4.0,15.0,Riesling,riesling,262136.0,299.0,Spätburgunder,spatburgunder,50164.0,355.0,Weissburgunder,weissburgunder,33464.0,345764.0,0.76,0.15,0.1,6
17539,143589067,Oliver Zeter Steingebiss Sauvignon Blanc,Oliver Zeter,White,germany,Pfalz,Sauvignon Blanc,2015,201,2.0,Weak,3.0,High,4.36,0.0,2.15,1.09,0.0,139,3.6,15.0,Riesling,riesling,262136.0,299.0,Spätburgunder,spatburgunder,50164.0,355.0,Weissburgunder,weissburgunder,33464.0,345764.0,0.76,0.15,0.1,36


In [84]:
type(set(clusters_list).intersection(clusters_list_2))

set