In [None]:
#import dependencies:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path

Part 1: Prepare the Data

In [None]:
#Loading data:
file = Path ('Resources/myopia.csv')
df = pd.read_csv(file)
df.head()

In [None]:
classes = df['MYOPIC']

In [None]:
#dropping MYOPIC column from the dataset
df1=df.drop(['MYOPIC'], axis=1)
df1.head()

In [None]:
#Standardise the dataset so that columns with larger values don't influence the outcome more than columns with smaller values
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaler_data = scaler.fit_transform(df1)

In [None]:
# Create a DataFrame with the transformed data
new_df1 = pd.DataFrame(scaler_data, columns=df1.columns)
new_df1.head()

In [None]:
# checking the number of the features:
new_df1.shape

Part 2: Apply Dimensionality Reduction

In [None]:
#  dimensionality reduction
from sklearn.decomposition import PCA

# initialise PCA model with preserve 90% of the explained variance in dimensionality reduction
pca = PCA (n_components=0.90)

new_df1_pca = pca.fit_transform(new_df1)

In [None]:
# Get the desired explained variance ratio
pca.explained_variance_ratio_.sum()

In [None]:
# checking the number of the features change
new_df1_pca.shape

In [None]:
# Using t-SNE to further reduce the dataset dimensions
from sklearn.manifold import TSNE

# initialise t-SNE model
tsne = TSNE(n_components=3, random_state=42, learning_rate= 400)

In [None]:
# reduce dimensions:
tsne_features = tsne.fit_transform(new_df1_pca)

In [None]:
# checking the number of the features change:
tsne_features.shape

In [None]:
tsne_features

In [None]:
# Prepare to plot the dataset 

plt.scatter(tsne_features[:,0], tsne_features[:,1], c =classes)
plt.show()

Part 3: Perform a Cluster Analysis with K-means:

In [None]:
# create an elbow plot to identify the best number of clusters
# Using a for loop to determine the inertia for each k between 1 and 10
from sklearn.cluster import KMeans
inertia =[]
k = list (range(1,11))

for i in k:
    km = KMeans (n_clusters=i, random_state= 0)
    km.fit (new_df1_pca)
    inertia.append (km.inertia_)

# Create the Elbow Curve using hvPlot
elbow_data = {"k": k, "inertia": inertia}
df_elbow = pd.DataFrame(elbow_data)
df_elbow.head()

In [None]:
# plot the elbow:
plt.plot (df_elbow['k'], df_elbow['inertia'])
plt.xticks(range(1,11))
plt.xlabel('Number of clusters (K)')
plt.ylabel('Inertia')
plt.title('Myopia Elbow Curve')
plt.show()