/
lines-per-hour.R
44 lines (31 loc) · 1.16 KB
/
lines-per-hour.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
#
# lines-per-hour.R, 27 Jul 16
#
# Data from:
# On the effectiveness of early life cycle defect prediction with Bayesian Nets
# Norman Fenton and Martin Neil and William Marsh and Peter Hearty and {\L}ukasz Radli\'{n}ski and Paul Krause
#
# Example from:
# Empirical Software Engineering using R
# Derek M. Jones
source("ESEUR_config.r")
brew_col=rainbow(3)
loc_hour=read.csv(paste0(ESEUR_dir, "regression/10.1.1.157.6206.csv.xz"), as.is=TRUE)
loc_hour=subset(loc_hour, !is.na(KLoC))
loc_hour=loc_hour[order(loc_hour$Hours), ]
x_bounds=range(loc_hour$Hours)
y_bounds=range(loc_hour$KLoC)
Hours_KLoC=function(df)
{
plot(df$Hours, df$KLoC, col=point_col,
xlim=x_bounds, ylim=y_bounds,
xlab="Effort (hours)", ylab="Lines of code (Kloc)\n")
lines(loess.smooth(df$Hours, df$KLoC, span=0.4), col=loess_col)
plh_mod=glm(KLoC ~ I(Hours^0.5), data=df)
plh_pred=predict(plh_mod, type="response", se.fit=TRUE)
lines(df$Hours, plh_pred$fit, col=brew_col[1])
lines(df$Hours, plh_pred$fit+plh_pred$se.fit*1.96, col=brew_col[3])
lines(df$Hours, plh_pred$fit-plh_pred$se.fit*1.96, col=brew_col[3])
return(plh_mod)
}
all_mod=Hours_KLoC(loc_hour)