# Import necessary libraries

In [None]:
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from IPython.display import display

# Define plotting colors

In [None]:
color_list = ['b', 'r', 'g', 'magenta', 'cyan', 'blueviolet',\
'orange', 'yellow', 'palegreen', 'grey', 'lime', 'peru', \
'teal', 'hotpink', 'cornflowerblue', 'lightcoral', 'darkgray',\
'whitesmoke', 'rosybrown', 'firebrick', 'salmon', 'chocolate',\
'bisque', 'tan', 'gold', 'olive', 'honeydew','thistle', 'k']

 # Function to read .csv file data into a Pandas data frame

In [None]:
def df_read(datafile):
    kmeans_df = pd.read_csv(datafile, delimiter = ',')
    return kmeans_df

# Function to apply z-score normalisation to a Pandas dataframe

In [None]:
def z_score(df):
    df_std = df.copy()
    for column in df_std.columns:
        df_std[column] = (df_std[column] - df_std[column].mean()) / df_std[column].std()
    return df_std

# Hard k-means in 3 dimensions: Wine chemical composition data

# Read the wine data into a Pandas dataframe

In [None]:
df_wine = df_read('ul_wine.data')

# Display df_wine contents in a table

In [None]:
display(df_wine)

# Apply z-score normalisation to df_wine

In [None]:
df_wine_standardized = z_score(df_wine)
display(df_wine_standardized)


# ✔ !STUDENTS TO ALTER! Choose data parameters for x, y, z components

In [None]:
x_param = 'Alcohol'                    
y_param = 'Malic_acid'
z_param = 'Alcalinity_of_ash'

# ✔ !STUDENTS TO ALTER! Select number of clusters

In [None]:
K_clust = 2

# Run k-means using students parameters and display metrics

In [None]:
x = df_wine_standardized.loc[:, x_param]
y = df_wine_standardized.loc[:, y_param]      
z = df_wine_standardized.loc[:, z_param]              

array_wine_standardised = np.empty([len(df_wine_standardized), 3])

for i in range(0,len(x)):
    array_wine_standardised[i,0] = x[i]
    array_wine_standardised[i,1] = y[i]
    array_wine_standardised[i,2] = z[i]

kmeans = KMeans(n_clusters=K_clust, random_state=0).fit(array_wine_standardised)
k_tracker = kmeans.labels_

unique, counts = np.unique(k_tracker, return_counts=True)
count_dict = dict(zip(unique+1, counts))
ssd = kmeans.inertia_
print('******************************************************************************')
print('Number of points in each cluster:')
print(count_dict)
print('******************************************************************************')
print('CLUSTER CENTRES:\n',kmeans.cluster_centers_)
print('******************************************************************************')
print('k TRACKER ARRAY:\n', k_tracker)
print('******************************************************************************')
print('SSD:\n', ssd)
print('******************************************************************************')

# Plot 3D chart showing clusters

In [None]:
x_k = kmeans.cluster_centers_[:,0]
y_k = kmeans.cluster_centers_[:,1]                                             
z_k = kmeans.cluster_centers_[:,2]

fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(x_k, y_k, z_k, marker='*', color='k')
for i in range(0,len(x)):
    for k in range(0,(11)):
        if k_tracker[i] == k:
            ax.scatter(x[i], y[i], z[i],c= color_list[k])
plt.savefig(str(k)+'_clusters_wine_plot.png')
plt.show()