## Converted R Code to Python Code

In [None]:
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from scipy.cluster.hierarchy import linkage, dendrogram
import matplotlib.pyplot as plt
import seaborn as sns

# Assume mcdonalds_data.csv is the file containing the data.
mcdonalds = pd.read_csv('mcdonalds_data.csv')

# Display basic structure
print(mcdonalds.head())
print(mcdonalds.info())


In [None]:
# Convert categorical variables to binary (Yes -> 1, No -> 0)
binary_columns = mcdonalds.columns[:11]
for col in binary_columns:
    mcdonalds[col] = (mcdonalds[col] == 'Yes').astype(int)

# Compute column means
col_means = mcdonalds[binary_columns].mean()
print(col_means.round(2))


In [None]:
# Perform PCA
scaler = StandardScaler()
scaled_data = scaler.fit_transform(mcdonalds[binary_columns])
pca = PCA()
pca.fit(scaled_data)

# Summary of PCA
print(f'Explained variance ratio: {pca.explained_variance_ratio_}')
print(f'Cumulative explained variance: {np.cumsum(pca.explained_variance_ratio_)}')


In [None]:
# Perform KMeans clustering
kmeans = KMeans(n_clusters=4, random_state=1234)
clusters = kmeans.fit_predict(scaled_data)
mcdonalds['Cluster'] = clusters

# Visualize clusters using the first two principal components
pca_data = pca.transform(scaled_data)
plt.scatter(pca_data[:, 0], pca_data[:, 1], c=clusters, cmap='viridis', alpha=0.5)
plt.xlabel('PC1')
plt.ylabel('PC2')
plt.title('Clusters based on PCA')
plt.show()


In [None]:
# Perform Hierarchical Clustering
linkage_matrix = linkage(scaled_data, method='ward')
plt.figure(figsize=(10, 7))
dendrogram(linkage_matrix, labels=clusters)
plt.title('Hierarchical Clustering Dendrogram')
plt.show()
