In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt

**Loading the dataset from the downloaded CSV file**

In [None]:
mcdonalds_path = '../input/mcdonalds/mcdonalds.csv'
mcdonalds = pd.read_csv(mcdonalds_path)
print(mcdonalds.columns.tolist())
print(mcdonalds.shape)
print(mcdonalds.head(3))

**Displaying Column names**

In [None]:
MD_x = mcdonalds.iloc[:, 0:11].copy()
MD_x = (MD_x == "Yes").astype(int)
# Calculate column means
column_means = np.round(MD_x.mean(), 2)
print(column_means)


**Displaying Summary**

In [None]:
# Perform PCA
MD_pca = PCA()
MD_pca.fit(MD_x)
print("Importance of components:")
print(pd.DataFrame({
    "Standard deviation": np.round(MD_pca.explained_variance_, 4),
    "Proportion of Variance": np.round(MD_pca.explained_variance_ratio_, 4),
    "Cumulative Proportion": np.round(np.cumsum(MD_pca.explained_variance_ratio_), 4)
}))


**Printing the standard deviations and rotation matrix of the PCA object**

In [None]:
def print_pca(pca_obj, digits):
    print("Standard deviations (1, .., p={}):".format(pca_obj.n_components_))
    print(np.round(pca_obj.explained_variance_, digits))
    print("Rotation (n x k) = ({} x {}):".format(pca_obj.components_.shape[1], pca_obj.components_.shape[0]))
    print(np.round(pca_obj.components_, digits))
print_pca(MD_pca, digits=1)


**Plotting PCA**

In [None]:
MD_pca = PCA()
MD_pca.fit(MD_x)
transformed_data = MD_pca.transform(MD_x)
plt.scatter(transformed_data[:, 0], transformed_data[:, 1], color='grey')
plt.xlabel("PC1")
plt.ylabel("PC2")
plt.title("PCA Plot")
plt.show()


**Printing Cluster Labels**

In [None]:
np.random.seed(1234)
k_values = range(2, 9)
best_model = None
best_score = float('inf')
for k in k_values:
    model = KMeans(n_clusters=k, n_init=10, random_state=1234)
    model.fit(MD_x)
    if model.inertia_ < best_score:
        best_model = model
        best_score = model.inertia_
cluster_labels = best_model.labels_
print(cluster_labels)


**Plotting using Elbow method**

In [None]:
k_values = range(2, 9)  
inertia_values = []
for k in k_values:
    kmeans = KMeans(n_clusters=k, n_init=10, random_state=1234)
    kmeans.fit(MD_x)
    inertia_values.append(kmeans.inertia_)
plt.plot(k_values, inertia_values, marker='o')
plt.xlabel("Number of Clusters (k)")
plt.ylabel("Inertia")
plt.title("Elbow Method to Choose Number of Clusters")
plt.show()
