<b><font size="5">Import relevant libraries and dataframes</font><a class="anchor"><a id='toc'></a></b>

In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import numpy as np
import warnings
warnings.simplefilter("ignore")
from sklearn.cluster import KMeans
from scipy.cluster.hierarchy import dendrogram, linkage
from sklearn.preprocessing import MinMaxScaler
import sompy
from sompy.sompy import SOMFactory

In [2]:
wm_cleaned = pd.read_csv(r'data\WineMatters_cleaned.csv')
wm_cleaned.set_index('WineID', inplace = True)
wm_cleaned.head()

Unnamed: 0_level_0,Aging_Time,Litters_Barrel,Magnesium,Residual_Sugar,Acidity,Floral,Wood,Sweetness,Red_Fruit,Citric,...,Alcohol,Astringency,Satisfaction_France,Satisfaction_Spain,Satisfaction_Portugal,Sulphites_Presence,Type_White,Barrel_Wooden,Grapes_Single,Satisfaction_General
WineID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,65,265,0,0.0,0,5,0,2,2,4,...,0,0,3,3,2,0,1,0,1,1
2,47,2464,310,305.0,4,2,0,3,0,2,...,0,0,4,3,2,1,0,0,1,1
3,15,2138,0,0.0,3,4,0,3,2,0,...,0,0,4,4,2,0,1,0,1,1
4,60,623,0,0.0,0,4,0,3,3,4,...,0,0,1,1,3,0,1,0,1,0
5,70,354,0,0.0,0,4,0,3,4,3,...,0,0,2,2,5,0,1,0,1,1


In [3]:
wm_scaled = pd.read_csv(r'data\WineMatters_scaled.csv')
wm_scaled.set_index('Unnamed: 0', inplace = True)
wm_scaled.index.rename('WineID', inplace=True)
wm_scaled.head()

Unnamed: 0_level_0,Aging_Time,Litters_Barrel,Magnesium,Residual_Sugar,Acidity,Floral,Wood,Sweetness,Red_Fruit,Citric,...,Cloudiness,Alcohol,Astringency,Satisfaction_France,Satisfaction_Spain,Satisfaction_Portugal,Sulphites_Presence,Type_White,Barrel_Wooden,Grapes_Single
WineID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.74359,0.034003,0.0,0.0,0.0,5.0,0.0,2.0,2.0,4.0,...,3.0,0.0,0.0,3.0,3.0,2.0,0.0,1.0,0.0,1.0
2,0.512821,0.381781,0.707763,0.693182,4.0,2.0,0.0,3.0,0.0,2.0,...,4.0,0.0,0.0,4.0,3.0,2.0,1.0,0.0,0.0,1.0
3,0.102564,0.330223,0.0,0.0,3.0,4.0,0.0,3.0,2.0,0.0,...,3.0,0.0,0.0,4.0,4.0,2.0,0.0,1.0,0.0,1.0
4,0.679487,0.090622,0.0,0.0,0.0,4.0,0.0,3.0,3.0,4.0,...,1.0,0.0,0.0,1.0,1.0,3.0,0.0,1.0,0.0,1.0
5,0.807692,0.048078,0.0,0.0,0.0,4.0,0.0,3.0,4.0,3.0,...,2.0,0.0,0.0,2.0,2.0,5.0,0.0,1.0,0.0,1.0


<b><font size="5">Segmentation of Variables after wm_scaled</font><a class="anchor"><a id='toc'></a></b>

There are two segmentation which will be used from now on using wm_scaled:
- flavor/ feeling
- production characteristics

In [4]:
# Definition of flavor/ feeling segmentation in wm_flavfeel
wm_flavfeel = wm_scaled[['Residual_Sugar', 'Acidity','Floral','Wood','Sweetness','Red_Fruit','Citric','Density','Cloudiness','Alcohol','Astringency']].copy()

In [5]:
# Definition of production characteristics segmentation in wm_prodchar
wm_prodchar = wm_scaled[['Aging_Time','Litters_Barrel','Type_White','Magnesium','Sulphites_Presence','Barrel_Wooden','Grapes_Single','Color_Intensity']].copy()

<b><font size="5">SOM - wm_flavfeel</font><a class="anchor"><a id='toc'></a></b>

In [6]:
wm_flavfeel.head()

Unnamed: 0_level_0,Residual_Sugar,Acidity,Floral,Wood,Sweetness,Red_Fruit,Citric,Density,Cloudiness,Alcohol,Astringency
WineID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,0.0,0.0,5.0,0.0,2.0,2.0,4.0,2.0,3.0,0.0,0.0
2,0.693182,4.0,2.0,0.0,3.0,0.0,2.0,2.0,4.0,0.0,0.0
3,0.0,3.0,4.0,0.0,3.0,2.0,0.0,2.0,3.0,0.0,0.0
4,0.0,0.0,4.0,0.0,3.0,3.0,4.0,3.0,1.0,0.0,0.0
5,0.0,0.0,4.0,0.0,3.0,4.0,3.0,4.0,2.0,0.0,0.0


In [7]:
scaler = MinMaxScaler()
wm_scal = pd.DataFrame(data = scaler.fit_transform(wm_flavfeel), columns = wm_flavfeel.columns, index = wm_flavfeel.index)
wm_scal = wm_scal[['Acidity','Floral','Wood','Sweetness','Red_Fruit','Citric','Density','Cloudiness','Alcohol','Astringency']].copy()
wm_scal.head()                

Unnamed: 0_level_0,Acidity,Floral,Wood,Sweetness,Red_Fruit,Citric,Density,Cloudiness,Alcohol,Astringency
WineID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1,0.0,1.0,0.0,0.4,0.4,0.8,0.4,0.6,0.0,0.0
2,0.8,0.4,0.0,0.6,0.0,0.4,0.4,0.8,0.0,0.0
3,0.6,0.8,0.0,0.6,0.4,0.0,0.4,0.6,0.0,0.0
4,0.0,0.8,0.0,0.6,0.6,0.8,0.6,0.2,0.0,0.0
5,0.0,0.8,0.0,0.6,0.8,0.6,0.8,0.4,0.0,0.0


In [8]:
# Convert the values of data to np.float32 (because SOM take numpy array as input)
wm_flavfeel_som = np.float32(wm_scal.values)

In [9]:
# Define the size of the SOM grid 
rows = 25
cols = 25
mapsize = [rows, cols]

In [10]:
# Create SOM instance using SOMFactory
flavfeel_som = SOMFactory().build(wm_flavfeel_som, mapsize, mask=None, 
                         mapshape='planar', 
                         lattice='rect', # topology: 'rect' or 'hexa'
                         normalization='var', 
                         initialization='pca', # initialization of the weights: 'pca' or 'random'
                         neighborhood='gaussian', # neighborhood function: 'gaussian' or 'bubble'
                         training='batch') # training mode: 'seq' or 'batch'

In [11]:
# Train the SOM model
flavfeel_som.train(n_job=1, verbose=False, train_rough_len=3, train_finetune_len=5)

ValueError: Input contains NaN, infinity or a value too large for dtype('float32').

<b><font size="5">SOM - wm_prodchar</font><a class="anchor"><a id='toc'></a></b>