/
k_means.R
53 lines (39 loc) · 1.29 KB
/
k_means.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
library(factoextra)
library(cluster)
#########################################
#LOAD DATA
#########################################
#load and prep data
df <- USArrests
df <- na.omit(df)
df <- scale(df)
#########################################
#DETERMINE HOW MANY CLUSTERS IS OPTIMAL
#########################################
#plot number of clusters vs. total within sum of squares
fviz_nbclust(df, kmeans, method = "wss")
#calculate gap statistic based on number of clusters
gap_stat <- clusGap(df,
FUN = kmeans,
nstart = 25,
K.max = 10,
B = 50)
#plot number of clusters vs. gap statistic
fviz_gap_stat(gap_stat)
##########################################
#PERFORM K-MEANS CLUSTERING WITH OPTIMAL K
##########################################
#make this example reproducible
set.seed(1)
#perform k-means clustering with k = 4 clusters
km <- kmeans(df, centers = 4, nstart = 25)
#view results
km
#plot results of final k-means model
fviz_cluster(km, data = df)
#find mean of each cluster
aggregate(USArrests, by=list(cluster=km$cluster), mean)
#add cluster assigment to original data
final_data <- cbind(USArrests, cluster = km$cluster)
#view final data
head(final_data)