In [None]:

from google.colab import files
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
from scipy.cluster.hierarchy import linkage, dendrogram

uploaded = files.upload()


file_name = list(uploaded.keys())[0]

# Load the data from the uploaded CSV file
whisky_data = pd.read_csv(file_name)

# Clean up column names by removing leading/trailing whitespaces
whisky_data.columns = whisky_data.columns.str.strip()

# Display information about missing values
print("Missing Values Before Cleaning:")
print(whisky_data.isnull().sum())

# Drop rows with missing values
whisky_data = whisky_data.dropna()

# Remove duplicates based on the 'Distillery' column
whisky_data = whisky_data.drop_duplicates(subset='Distillery', keep='first')

# Display information about missing values after cleaning
print("\nMissing Values After Cleaning:")
print(whisky_data.isnull().sum())

# Display information about duplicates after cleaning
print("\nDuplicates After Cleaning:")
print(whisky_data.duplicated(subset='Distillery').sum())

# Create a distance matrix based on sweetness
sweetness_matrix = whisky_data['Sweetness'].values.reshape(-1, 1)
linkage_matrix = linkage(sweetness_matrix, method='average')

# Plot the hierarchical clustering dendrogram
plt.figure(figsize=(10, 6))
dendrogram(linkage_matrix, labels=whisky_data['Distillery'].tolist(), orientation='top', leaf_font_size=10)
plt.title('Hierarchical Clustering Dendrogram - Sweetness')
plt.xlabel('Whiskies')
plt.ylabel('Distance')
plt.show()


In [None]:
# Identify clusters based on sweetness
sweetness_clusters = pd.Series(linkage(linkage_matrix, method='average')[:, 2])

# Assign cluster labels to whiskies
whisky_data['Sweetness_Cluster'] = sweetness_clusters

# Display the top ten whiskies that are similar in sweetness
top_sweetness_whiskies = (
    whisky_data.groupby('Sweetness_Cluster')['Distillery']
    .apply(lambda x: ', '.join(x.head(10).tolist()))
    .reset_index(name='Top 10 Sweetness Whiskies')
)

print("Top 10 Whiskies Similar in Sweetness:")
print(top_sweetness_whiskies)


