Skip to content

Latest commit

ย 

History

History
351 lines (314 loc) ยท 12.6 KB

220507.md

File metadata and controls

351 lines (314 loc) ยท 12.6 KB

๋‘ ๋ณ€์ˆ˜ ์‚ฌ์ด์˜ ์‚ฐ์ ๋„

์‚ฐ์ ๋„

  • scatter plot
  • 2๊ฐœ์˜ ๋ณ€์ˆ˜๋กœ ๊ตฌ์„ฑ๋œ ์ž๋ฃŒ์˜ ๋ถ„ํฌ๋ฅผ ์•Œ์•„๋ณด๋Š” ๊ทธ๋ž˜ํ”„
  • ๊ด€์ธก๊ฐ’๋“ค์˜ ๋ถ„ํฌ๋ฅผ ํ†ตํ•ด 2๊ฐœ์˜ ๋ณ€์ˆ˜ ์‚ฌ์ด์˜ ๊ด€๊ณ„๋ฅผ ํŒŒ์•…ํ•  ์ˆ˜ ์žˆ๋Š” ๊ธฐ๋ฒ•
wt <- mtcars$wt
mpg <- mtcars$mpg
plot(wt, mpg,                 # 2๊ฐœ ๋ณ€์ˆ˜(x์ถ•, y์ถ•)
     main="์ค‘๋Ÿ‰-์—ฐ๋น„ ๊ทธ๋ž˜ํ”„", #์ œ๋ชฉ
     xlab="์ค‘๋Ÿ‰",             # x์ถ• ๋ ˆ์ด๋ธ”
     ylab="์—ฐ๋น„(MPG)",        # y์ถ• ๋ ˆ์ด๋ธ”
     col="red",               # point์˜ ์ƒ‰
     pch=19)                  # point์˜ ์ข…๋ฅ˜๋ฅ˜
  • pch๊ฐ’์— ๋”ฐ๋ผ ๋‹ค๋ฅธ ์ ์˜ ๋ชจ์–‘์„ ์„ ํƒํ•  ์ˆ˜ ์žˆ์Œ
  • ๊ทธ๋ž˜ํ”„๋ฅผ ๋ณด๋ฉด ์ค‘๋Ÿ‰์ด ์ฆ๊ฐ€ํ• ์ˆ˜๋ก ์—ฐ๋น„๋Š” ๊ฐ์†Œํ•˜๋Š” ๊ฒฝํ–ฅ์„ ํ™•์ธ ๊ฐ€๋Šฅ

Rplot08

plotํ•จ์ˆ˜๋Š” ๋‹ค์Œ๊ณผ ๊ฐ™์ด ์‚ฌ์šฉํ•  ์ˆ˜๋„ ์žˆ์Œ

plot(mtcars$wt, mtcars$mpg,
...)

plot(mtcars[,c('wt', 'mpg')].
...)

plot(mpg~wt, data=mtcars,
...)

์—ฌ๋Ÿฌ ๋ณ€์ˆ˜๋“ค ๊ฐ„์˜ ์‚ฐ์ ๋„

vars <- c('mpg','disp','drat','wt') # ๋Œ€์ƒ ๋ณ€์ˆ˜
target <- mtcars[,vars]
head(target)
pairs(target,                       # ๋Œ€์ƒ ๋ฐ์ดํ„ฐํ„ฐ
      main="Multi Plots")
  • ๋‹ค์ค‘ ์‚ฐ์ ๋„๋Š” ๋Œ€๊ฐ์„ ์„ ๊ธฐ์ค€์œผ๋กœ ์˜ค๋ฅธ์ชฝ ์œ„์˜ ์‚ฐ์ ๋„๋“ค๊ณผ ์™ผ์ชฝ ์•„๋ž˜์˜ ์‚ฐ์ ๋„๋“ค์ด ๋Œ€์นญ์„ ์ด๋ฃธ
  • disp, wt ์‚ฐ์ ๋„๋Š” ํ•œ์ชฝ์ด ์ฆ๊ฐ€ํ•˜๋ฉด ๋‹ค๋ฅธ ์ชฝ๋„ ์ฆ๊ฐ€
  • drat, wt ์‚ฐ์ ๋„๋Š” ํ•œ ์ชฝ์ด ์ฆ๊ฐ€ํ•˜๋ฉด ๋‹ค๋ฅธ ์ชฝ์ด ๊ฐ์†Œ

Rplot09

๊ทธ๋ฃน ์ •๋ณด๊ฐ€ ์žˆ๋Š” ๋‘ ๋ณ€์ˆ˜์˜ ์‚ฐ์ ๋„

  • ๋‘ ๋ณ€์ˆ˜ ๊ฐ„์˜ ๊ด€๊ณ„๋ฟ๋งŒ ์•„๋‹ˆ๋ผ ๊ทธ๋ฃน ๊ฐ„์˜ ๊ด€๊ณ„๋„ ํŒŒ์•…ํ•  ์ˆ˜ ์žˆ์Œ
iris.2 <- iris[,3:4]              # ๋ฐ์ดํ„ฐ ์ค€๋น„
point <- as.numeric(iris$Species) # ์ ์˜ ๋ชจ์–‘
point
color <- c('red','green','blue')
plot(iris.2,
     main="Iris plot",
     pch=c(point),
     col=color[point])
iris.2 <- iris[,3:4]              # ๋ฐ์ดํ„ฐ ์ค€๋น„

> point <- as.numeric(iris$Species) # ์ ์˜ ๋ชจ์–‘

> point
  [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 [31] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2
 [61] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 [91] 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
[121] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3

Rplot10

์„ค๋ช…

  • ํ’ˆ์ข…๋ณ„๋กœ ์ ์˜ ๋ชจ์–‘์„ ์ง€์ •ํ•˜๊ธฐ ์œ„ํ•ด pch๋ฅผ ๋”ฐ๋กœ ์ง€์ •
  • as.numeric(iris$Species)๋Š” ํŒฉํ„ฐ ํƒ€์ž…์œผ๋กœ ๋˜์–ด ์žˆ๋Š” iris$Species๋ฅผ ์ˆซ์ž๋กœ ๋ฐ”๊พธ๋Š” ์—ญํ• 
  • ๊ฝƒ์žŽ์˜ ๊ธธ์ด๊ฐ€ ๊ธธ์ˆ˜๋ก ๊ฝƒ์žŽ์˜ ํญ๋„ ์ปค์ง
  • setosa ํ’ˆ์ข…์€ ๋‹ค๋ฅธ ๋‘ ํ’ˆ์ข…์— ๋น„ํ•ด ๊ฝƒ์žŽ์˜ ๊ธธ์ด์™€ ํญ์ด ํ™•์—ฐํžˆ ์ž‘์Œ
  • virginica ํ’ˆ์ข…์€ ๋‹ค๋ฅธ ๋‘ ํ’ˆ์ข…์— ๋น„ํ•ด ๊ฝƒ์žŽ์˜ ๊ธธ์ด์™€ ํญ์ด ์ œ์ผ ํผ

์ƒ๊ด€๋ถ„์„๊ณผ ์ƒ๊ด€๊ณ„์ˆ˜

  • ์ถ”์„ธ์˜ ๋ชจ์–‘์ด ์„ ์ด๋ฉด ์„ ํ˜•์  ๊ด€๊ณ„์— ์žˆ๋‹ค๊ณ  ํ‘œํ˜„
  • ์„ ํ˜•์  ๊ด€๊ณ„์—๋„ ๊ฐ•ํ•œ ์„ ํ˜•์  ๊ด€๊ณ„๊ฐ€ ์žˆ๊ณ  ์•ฝํ•œ ์„ ํ˜•์  ๊ด€๊ณ„๊ฐ€ ์žˆ์Œ
  • ์–ผ๋งˆ๋‚˜ ์„ ํ˜•์„ฑ์„ ๋ณด์ด๋Š”์ง€ ์ˆ˜์น˜์ƒ์œผ๋กœ ๋‚˜ํƒ€๋‚ด๋Š” ๋ฐฉ๋ฒ•์ด ์ƒ๊ด€๋ถ„์„

์ƒ๊ด€๊ณ„์ˆ˜

  • -1 <= r <= 1
  • r > 0: ์–‘์˜ ์ƒ๊ด€๊ด€๊ณ„ (x๊ฐ€ ์ฆ๊ฐ€ํ•˜๋ฉด y๋„ ์ฆ๊ฐ€)
  • r < 0: ์Œ์˜ ์ƒ๊ด€๊ด€๊ณ„ (x๊ฐ€ ์ฆ๊ฐ€ํ•˜๋ฉด y๋Š” ๊ฐ์†Œ)
  • r์ด 1์ด๋‚˜ -1์— ๊ฐ€๊นŒ์šธ์ˆ˜๋ก x, y์˜ ์ƒ๊ด€์„ฑ์ด ๋†’์Œ
beers <- c(5,2,9,8,3,7,3,5,3,5)
bal <- c(0.1,0.03,0.19,0.12,0.04,0.0095,0.07,0.06,0.02,0.05)
tbl <- data.frame(beers,bal)
tbl
plot(bal~beers,data=tbl)
res <- lm(bal~beers,data=tbl) # ํšŒ๊ท€์‹ ๋„์ถœ
abline(res)                   # ํšŒ๊ท€์„  ๊ทธ๋ฆฌ๊ธฐ
cor(beers,bal)                # ์ƒ๊ด€๊ณ„์ˆ˜ ๊ณ„์‚ฐ
> beers <- c(5,2,9,8,3,7,3,5,3,5)

> bal <- c(0.1,0.03,0.19,0.12,0.04,0.0095,0.07,0.06,0.02,0.05)

> tbl <- data.frame(beers,bal)

> tbl
   beers    bal
1      5 0.1000
2      2 0.0300
3      9 0.1900
4      8 0.1200
5      3 0.0400
6      7 0.0095
7      3 0.0700
8      5 0.0600
9      3 0.0200
10     5 0.0500

> plot(bal~beers,data=tbl)

> res <- lm(bal~beers,data=tbl) # ํšŒ๊ท€์‹ ๋„์ถœ

> abline(res)                   # ํšŒ๊ท€์„  ๊ทธ๋ฆฌ๊ธฐ

> cor(beers,bal)                # ์ƒ๊ด€๊ณ„์ˆ˜ ๊ณ„์‚ฐ
[1] 0.6797025

Rplot11

์„ค๋ช…

  • ์Œ์ฃผ์ •๋„๊ฐ€ ๋†’์œผ๋ฉด ํ˜ˆ์ค‘์•Œ์ฝœ๋†๋„๋„ ๋†’์•„์ง€๋Š” ์ถ”์„ธ
  • lm()์€ ๋‘ ๋ณ€์ˆ˜์˜ ์„ ํ˜•๊ด€๊ณ„๋ฅผ ๊ฐ€์žฅ ์ž˜ ๋‚˜ํƒ€๋‚ผ ์ˆ˜ ์žˆ๋Š” ์„ ์˜ ์‹(ํšŒ๊ท€์‹)์„ ์ž๋™์œผ๋กœ ์ฐพ๋Š” ์—ญํ• 
  • abline()์€ ํšŒ๊ท€์‹์œผ๋กœ ํšŒ๊ท€์„ ์„ ๊ทธ๋ฆฌ๋Š” ์—ญํ• 
  • cor()์€ ์ƒ๊ด€๊ณ„์ˆ˜๋ฅผ ๊ตฌํ•˜๋Š” ์—ญํ• 
  • ์ƒ๊ด€๊ณ„์ˆ˜๊ฐ’์€ ์•ฝ 0.68๋กœ ๋†’์€ ์ƒ๊ด€์„ฑ์„ ๋ณด์ž„
  • cor()์€ ์—ฌ๋Ÿฌ ๊ฐœ์˜ ๋ณ€์ˆ˜๋ฅผ ์ž…๋ ฅํ•˜๋ฉด ์—ฌ๋Ÿฌ ๊ฐœ์˜ ๋ณ€์ˆ˜ ์‚ฌ์ด์˜ ์ƒ๊ด€๊ณ„์ˆ˜๊ฐ’์„ ํ…Œ์ด๋ธ” ํ˜•ํƒœ๋กœ ๋‚˜ํƒ€๋ƒ„
cor(iris[,1:4])
> cor(iris[,1:4])
             Sepal.Length Sepal.Width Petal.Length Petal.Width
Sepal.Length    1.0000000  -0.1175698    0.8717538   0.8179411
Sepal.Width    -0.1175698   1.0000000   -0.4284401  -0.3661259
Petal.Length    0.8717538  -0.4284401    1.0000000   0.9628654
Petal.Width     0.8179411  -0.3661259    0.9628654   1.0000000

์„ ๊ทธ๋ž˜ํ”„

์‹œ๊ณ„์—ด ์ž๋ฃŒ

  • ์‹œ๊ฐ„์˜ ๋ณ€ํ™”์— ๋”ฐ๋ผ ์ž๋ฃŒ๋ฅผ ์ˆ˜์ง‘ํ•œ ๊ฒฝ์šฐ
month <- 1:12
late <- c(5,8,7,9,4,6,12,13,8,6,6,4)
plot(month,              # x data
     late,               # y data
     main="์ง€๊ฐ์ƒ ํ†ต๊ณ„",
     type="l",           # ๊ทธ๋ž˜ํ”„์˜ ์ข…๋ฅ˜ ์„ ํƒ
     lty=1,              # ์„ ์˜ ์ข…๋ฅ˜ ์„ ํƒ
     lwd=1,              # ์„ ์˜ ๊ตต๊ธฐ๊ธฐ
     xlab="Month",
     ylab="Late cnt")

Rplot12

์„ค๋ช…

  • type์„ ๋‹ค๋ฅด๊ฒŒํ•˜๋ฉด ๋‹ค์Œ๊ณผ ๊ฐ™์€ ๊ฒฐ๊ณผ๊ฐ€ ๋‚˜์˜ด
  • lty๋Š” 1~6๊นŒ์ง€๋กœ ์ง€์ •ํ•  ์ˆ˜ ์žˆ์Œ
  • ์ง€๊ฐ์ƒ ์ˆ˜๊ฐ€ 5์›”์— ๊ธ‰๊ฐํ–ˆ๋‹ค๊ฐ€ 7, 8์›”์— ๊ธ‰์ฆํ•˜๋Š” ๊ฒƒ์„ ์•Œ ์ˆ˜ ์žˆ์Œ

Rplot13

๋ณต์ˆ˜์˜ ์„ ๊ทธ๋ž˜ํ”„ ์ž‘์„ฑ

month <- 1:12
late1 <- c(5,8,7,9,4,6,12,13,8,6,6,4)
late2 <- c(4,6,5,8,7,8,10,11,6,5,7,3)
plot(month,
     late1,
     main="Late Students",
     type="b",
     lty=1,
     col="red",
     xlab="Month",
     ylab="Late cnt",
     ylim=c(1,15))          # y์ถ• ๊ฐ’์˜ (ํ•˜ํ•œ, ์ƒํ•œ)
lines(month,
      late2,
      type="b",
      col="blue")

Rplot14

์„ค๋ช…

  • lines()๋Š” plot()์œผ๋กœ ์ž‘์„ฑํ•œ ๊ทธ๋ž˜ํ”„ ์œ„์— ์„ ์„ ๊ฒน์ณ์„œ ๊ทธ๋ฆฌ๋Š” ์—ญํ• 

BostonHousing ๋ฐ์ดํ„ฐ์…‹

  • ๋ฏธ๊ตญ ๋ณด์Šคํ„ด ์ง€์—ญ์˜ ์ฃผํƒ ๊ฐ€๊ฒฉ ์ •๋ณด์™€ ์ฃผํƒ ๊ฐ€๊ฒฉ์— ์˜ํ–ฅ์„ ๋ฏธ์น˜๋Š” ์—ฌ๋Ÿฌ ์š”์†Œ๋“ค์— ๋Œ€ํ•œ ์ •๋ณด๋ฅผ ๋‹ด๊ณ  ์žˆ์Œ
  • ์ด 14๊ฐœ์˜ ๋ณ€์ˆ˜๋กœ ๊ตฌ์„ฑ
  • ์—ฌ๊ธฐ์„œ๋Š” 5๊ฐœ๋งŒ ์‚ฌ์šฉ
    • crim: ์ง€์—ญ์˜ 1์ธ๋‹น ๋ฒ”์ฃ„์œจ
    • rm: ์ฃผํƒ 1๊ฐ€๊ตฌ๋‹น ๋ฐฉ์˜ ๊ฐœ์ˆ˜
    • dis: ๋ณด์Šคํ„ด์˜ 5๊ฐœ ์ง์—… ์„ผํ„ฐ๊นŒ์ง€์˜ ๊ฑฐ๋ฆฌ
    • tax: ์žฌ์‚ฐ์„ธ์œจ
    • medv: ์ฃผํƒ ๊ฐ€๊ฒฉ

ํƒ์ƒ‰์  ๋ฐ์ดํ„ฐ ๋ถ„์„ ๊ณผ์ •

๋ถ„์„ ๋Œ€์ƒ ๋ฐ์ดํ„ฐ์…‹ ์ค€๋น„

library(mlbench)
data("BostonHousing")
myds <- BostonHousing[,c("crim","rm","dis","tax","medv")]

grp ๋ณ€์ˆ˜ ์ถ”๊ฐ€

  • myds๊ฐ€ ์•ž์œผ๋กœ ํƒ์ƒ‰ํ•  ๋ฐ์ดํ„ฐ์…‹
  • grp๋Š” ์ฃผํƒ ๊ฐ€๊ฒฉ์„ ์ƒ(H), ์ค‘(M), ํ•˜(L)๋กœ ๋ถ„๋ฅ˜ํ•œ ๊ฒƒ
  • 25.0 ์ด์ƒ์ด๋ฉด ์ƒ, 17.0 ์ดํ•˜์ด๋ฉด ํ•˜, ๋‚˜๋จธ์ง€๋Š” ์ค‘์œผ๋กœ ๋ถ„๋ฅ˜
grp <- c()
for (i in 1:nrow(myds)) {                # medv ๊ฐ’์— ๋”ฐ๋ผ ๋ถ„๋ฅ˜
  if (myds$medv[i] >= 25.0) {
    grp[i] <- "H"
  } else if (myds$medv[i] <= 17.0) {
    grp[i] <- "L"
  } else {
    grp[i] <- "M"
  }
}
grp <- factor(grp)                       # ๋ฌธ์ž ๋ฒกํ„ฐ๋ฅผ ํŒฉํ„ฐ ํƒ€์ž…์œผ๋กœ ๋ณ€๊ฒฝ
grp <- factor(grp,levels=c("H","M","L")) # ๋ ˆ๋ฒจ์˜ ์ˆœ์„œ๋ฅผ H,L,M์—์„œ H,M,L๋กœ ๋ณ€๊ฒฝ
myds <- data.frame(myds, grp)            # myds์— grp ์—ด ์ถ”๊ฐ€

๋ฐ์ดํ„ฐ์…‹์˜ ํ˜•ํƒœ์™€ ๊ธฐ๋ณธ์ ์ธ ๋‚ด์šฉ ํŒŒ์•…

  • myds๋Š” ๋ฐ์ดํ„ฐํ”„๋ ˆ์ž„์ด๊ณ , 506๊ฐœ์˜ ์ฃผํƒ์— ๋Œ€ํ•œ ์ •๋ณด๋ฅผ ๋‹ด๊ณ  ์žˆ์œผ๋ฉฐ 6๊ฐœ์˜ ๋ณ€์ˆ˜๋กœ ๊ตฌ์„ฑ
  • grp๋งŒ ํŒฉํ„ฐ ํƒ€์ž…์ด๊ณ  ๋‚˜๋จธ์ง€๋Š” ์ˆซ์ž ํƒ€์ž… ๋ณ€์ˆ˜
  • table์„ ํ†ตํ•ด H๊ฐ€ 132์ฑ„, M์ด 247์ฑ„, L์ด 127์ฑ„ ์žˆ์Œ์„ ์•Œ ์ˆ˜ ์žˆ์Œ
str(myds)
head(myds)
table(myds$grp) # ์ฃผํƒ ๊ฐ€๊ฒฉ ๊ทธ๋ฃน๋ณ„ ๋ถ„ํฌํฌ
> str(myds)
'data.frame':	506 obs. of  6 variables:
 $ crim: num  0.00632 0.02731 0.02729 0.03237 0.06905 ...
 $ rm  : num  6.58 6.42 7.18 7 7.15 ...
 $ dis : num  4.09 4.97 4.97 6.06 6.06 ...
 $ tax : num  296 242 242 222 222 222 311 311 311 311 ...
 $ medv: num  24 21.6 34.7 33.4 36.2 28.7 22.9 27.1 16.5 18.9 ...
 $ grp : Factor w/ 3 levels "H","M","L": 2 2 1 1 1 1 2 1 3 2 ...

> head(myds)
     crim    rm    dis tax medv grp
1 0.00632 6.575 4.0900 296 24.0   M
2 0.02731 6.421 4.9671 242 21.6   M
3 0.02729 7.185 4.9671 242 34.7   H
4 0.03237 6.998 6.0622 222 33.4   H
5 0.06905 7.147 6.0622 222 36.2   H
6 0.02985 6.430 6.0622 222 28.7   H

> table(myds$grp) # ์ฃผํƒ ๊ฐ€๊ฒฉ ๊ทธ๋ฃน๋ณ„ ๋ถ„ํฌํฌ

  H   M   L 
132 247 127 

ํžˆ์Šคํ† ๊ทธ๋žจ์— ์˜ํ•œ ๊ด€์ธก๊ฐ’์˜ ๋ถ„ํฌ ํ™•์ธ

  • ๊ทธ๋ฃน ์ •๋ณด์ธ grp๋Š” ์ œ์™ธ
  • rm, mdev๋งŒ ์ •๊ทœ๋ถ„ํฌ์— ๊ฐ€๊น๊ณ , crim, dis๋Š” ๊ด€์ธก๊ฐ’๋“ค์ด ํ•œ์ชฝ์œผ๋กœ ์ ๋ ค์„œ ๋ถ„ํฌ
  • tax๋Š” ์ค‘๊ฐ„์— ๊ด€์ธก๊ฐ’์ด ์—†๋Š” ๋นˆ ๊ตฌ๊ฐ„์ด ์กด์žฌํ•˜๋Š” ํŠน์ง•์„ ๋ณด์ž„
  • ๊ด€์ธก๊ฐ’๋“ค์˜ ๋ถ„ํฌ๊ฐ€ ์ •๊ทœ๋ถ„ํฌ๊ฐ€ ์•„๋‹ˆ๋ฉด ๋ถ„ํฌ์— ๋Œ€ํ•œ ํ•ด์„์ด ํ•„์š”
par(mfrow=c(2,3))
for(i in 1:5) {
  hist(myds[,i], main=colnames(myds)[i], col="yellow")
}
par(mfrow=c(1,1))

Rplot15

์ƒ์ž๊ทธ๋ฆผ์— ์˜ํ•œ ๊ด€์ธก๊ฐ’์˜ ๋ถ„ํฌ ํ™•์ธ

  • crim์€ ๊ด€์ธก๊ฐ’๋“ค์ด ์ข์€ ์ง€์—ญ์— ๋ฐ€์ง‘๋˜์–ด ์žˆ๋Š” ๊ฒƒ(๊ด€์ธก๊ฐ’๋“ค์˜ ํŽธ์ฐจ๊ฐ€ ์ž‘์Œ)์„ ํ™•์ธ ๊ฐ€๋Šฅ
  • tax๋Š” ๋„“๊ฒŒ ํผ์ ธ ์žˆ๋Š” ๊ฒƒ(๊ด€์ธก๊ฐ’๋“ค์˜ ํŽธ์ฐจ๊ฐ€ ๋น„๊ต์  ํผ)์„ ํ™•์ธ ๊ฐ€๋Šฅ
par(mfrow=c(2,3))
for(i in 1:5) {
  boxplot(myds[,i], main=colnames(myds)[i])
}
par(mfrow=c(1,1))

Rplot16

๊ทธ๋ฃน๋ณ„ ๊ด€์ธก๊ฐ’ ๋ถ„ํฌ์˜ ํ™•์ธ

  • ์ฃผํƒ ๊ฐ€๊ฒฉ์ด ๋†’์€ ์ง€์—ญ์ด๋‚˜ ์ค‘๊ฐ„ ์ง€์—ญ์€ ๋ฒ”์ฃ„์œจ์€ ๋‚ฎ๊ณ , ์ฃผํƒ ๊ฐ€๊ฒฉ์ด ๋‚ฎ์€ ์ง€์—ญ์˜ ๋ฒ”์ฃ„์œจ์ด ๋†’๊ฒŒ ๋‚˜ํƒ€๋‚จ
  • ์ฃผํƒ ๊ฐ€๊ฒฉ์ด ๋†’์œผ๋ฉด ๋ฐฉ์˜ ๊ฐœ์ˆ˜๋„ ๋งŽ์Œ
  • ์ค‘๊ฐ„ ๊ทธ๋ฃน์˜ ๋ฐฉ์˜ ๊ฐœ์ˆ˜๊ฐ€ 5.2~6.8 ์‚ฌ์ด๋กœ ๋น„๊ต์  ๊ท ์ผ
  • ํ•˜์œ„ ๊ทธ๋ฃน์˜ ๋ฐฉ ๊ฐœ์ˆ˜๋Š” 4.5~7.2 ์‚ฌ์ด๋กœ ๋„“๊ฒŒ ํผ์ ธ ์žˆ์Œ
  • ํ•˜์œ„ ๊ทธ๋ฃน์˜ ์žฌ์‚ฐ์„ธ์œจ์€ ๋„“๊ฒŒ ๋ถ„ํฌํ•˜๊ณ  ์žˆ์Œ
  • ์žฌ์‚ฐ์„ธ์œจ์€ ์ƒ์œ„์—์„œ ํ•˜์œ„๋กœ ๊ฐˆ ์ˆ˜๋ก ๋†’์•„์ง
boxplot(myds$crim~myds$grp, main="1์ธ๋‹น ๋ฒ”์ฃ„์œจ")

Rplot17

boxplot(myds$rm~myds$grp,main="๋ฐฉ์˜ ๊ฐœ์ˆ˜")

Rplot18

par(mfrow=c(1,3))
boxplot(myds$dis~myds$grp,main="dis")
boxplot(myds$tax~myds$grp,main="tax")
boxplot(myds$medv~myds$grp,main="medv")
par(mfrow=c(1,1))

Rplot19

๋‹ค์ค‘ ์‚ฐ์ ๋„๋ฅผ ํ†ตํ•œ ๋ณ€์ˆ˜ ๊ฐ„ ์ƒ๊ด€๊ด€๊ณ„์˜ ํ™•์ธ

  • medv์™€ rm์€ ์–‘์˜ ์ƒ๊ด€์„ฑ์ด ์žˆ๋Š” ๊ฒƒ์œผ๋กœ ๋ณด์ž„
  • crim์€ medv์™€ ์Œ์˜ ์ƒ๊ด€์„ฑ์ด ์žˆ๋Š” ๊ฒƒ์œผ๋กœ ๋ณด์ž„
pairs(myds[,-6])

Rplot20

๊ทธ๋ฃน ์ •๋ณด๋ฅผ ํฌํ•จํ•œ ๋ณ€์ˆ˜ ๊ฐ„ ์ƒ๊ด€๊ด€๊ณ„์˜ ํ™•์ธ

  • (crim-medv), (rm-medv), (dis-medv), (tax-medv) ์‚ฐ์ ๋„์—์„œ ๊ทธ๋ฃน๋ณ„๋กœ ๋ถ„ํฌ ์œ„์น˜๊ฐ€ ๋šœ๋ ทํ•˜๊ฒŒ ๊ตฌ๋ถ„๋˜๋Š” ๊ฒƒ์„ ์•Œ ์ˆ˜ ์žˆ์Œ
  • ์ค‘๊ฐ„ ๊ทธ๋ฃน์€ ์ƒ์œ„ ๊ทธ๋ฃน๊ณผ ํ•˜์œ„ ๊ทธ๋ฃน์— ๋น„ํ•ด ์ฃผํƒ ๊ฐ€๊ฒฉ์˜ ๋ณ€๋™ํญ์ด ์ข์Œ
point <- as.integer(myds$grp)
color <- c("red","green","blue")
pairs(myds[,-6],pch=point,col=color[point])

Rplot21

๋ณ€์ˆ˜ ๊ฐ„ ์ƒ๊ด€๊ณ„์ˆ˜์˜ ํ™•์ธ

  • medv ๊ธฐ์ค€์œผ๋กœ ๋ณผ ๋•Œ ์ƒ๊ด€๊ณ„์ˆ˜๊ฐ€ ๊ฐ€์žฅ ๋†’์€ ๊ฒƒ์€ rm
  • ์‚ฐ์ ๋„ ์ƒ์œผ๋กœ ์Œ์˜ ์ƒ๊ด€์„ฑ์ด ๋†’์€ ๊ฒƒ์œผ๋กœ ๋ณด์˜€๋˜ crim์€ ์‹ค์ œ๋กœ๋Š” ๊ทธ๋ ‡์ง€ ์•Š์€ ๊ฒƒ์„ ์•Œ ์ˆ˜ ์žˆ์Œ
cor(myds[,-6])
> cor(myds[,-6])
           crim         rm        dis        tax       medv
crim  1.0000000 -0.2192467 -0.3796701  0.5827643 -0.3883046
rm   -0.2192467  1.0000000  0.2052462 -0.2920478  0.6953599
dis  -0.3796701  0.2052462  1.0000000 -0.5344316  0.2499287
tax   0.5827643 -0.2920478 -0.5344316  1.0000000 -0.4685359
medv -0.3883046  0.6953599  0.2499287 -0.4685359  1.0000000