Skip to content
Permalink
master
Switch branches/tags
Go to file
 
 
Cannot retrieve contributors at this time
93 lines (71 sloc) 2.72 KB
#
# Linux-DAYLOC.R, 25 Mar 20
#
# Data from:
# The {Linux} Kernel as a Case Study in Software Evolution
# Ayelet Israeli and Dror G. Feitelson
#
# Example from:
# Evidence-based Software Engineering: based on the publicly available data
# Derek M. Jones
#
# TAG Linux_evolution Linux_LOC LOC_day
source("ESEUR_config.r")
plot_layout(2, 1)
pal_col=rainbow(2)
# Lines of code in each release
ll=read.csv(paste0(ESEUR_dir, "regression/Linux-LOC.csv.xz"), as.is=TRUE)
# Data of each release
ld=read.csv(paste0(ESEUR_dir, "regression/Linux-days.csv.xz"), as.is=TRUE)
loc_date=merge(ll, ld)
# Add column giving number of days since first release
loc_date$Release_date=as.Date(loc_date$Release_date, format="%d-%b-%Y")
start.date=loc_date$Release_date[1]
loc_date$Number_days=as.integer(difftime(loc_date$Release_date,
start.date,
units="days"))
# Order by days since first release
ld_ordered=loc_date[order(loc_date$Number_days), ]
# What is the latest version
n_Version=numeric_version(ld_ordered$Version)
# cummax does not work for numeric_version, so we have to track
# the latest version. Signal that it is used as a global.
greatest_version <<- n_Version[1]
keep_version=sapply(2:nrow(ld_ordered),
function(X)
{
if (n_Version[X] > greatest_version)
{
greatest_version <<- n_Version[X]
return(TRUE)
}
else
return(FALSE)
})
latest_version=ld_ordered[c(TRUE, keep_version), ]
latest_version$MLOC=latest_version$LOC/1e6
x_lim=c(0, max(latest_version$Number_days))
y_lim=c(0, max(latest_version$MLOC))
plot(latest_version$Number_days, latest_version$MLOC, col=pal_col[2],
xaxs="i", yaxs="i",
xlim=x_lim, ylim=y_lim,
xlab="Days", ylab="Total lines of code (million)\n")
m1=glm(MLOC ~ Number_days, data=latest_version)
pred=predict(m1, type="response", se.fit=TRUE)
lines(latest_version$Number_days, pred$fit, col=pal_col[1])
#lines(latest_version$Number_days, pred$fit+1.96*pred$se.fit)
#lines(latest_version$Number_days, pred$fit-1.96*pred$se.fit)
plot(latest_version$Number_days, latest_version$MLOC, col=pal_col[2],
xaxs="i", yaxs="i",
xlim=x_lim, ylim=y_lim,
xlab="Days", ylab="Total lines of code (million)\n")
m2=glm(MLOC ~ Number_days+I(Number_days^2), data=latest_version)
pred=predict(m2, type="response", se.fit=TRUE)
lines(latest_version$Number_days, pred$fit, col=pal_col[1])
# m3=glm(LOC ~ Number_days+I(Number_days^2)+I(Number_days^3), data=latest_version)
#
# pred=predict(m3, type="response", se.fit=TRUE)
# lines(latest_version$Number_days, pred$fit, col="red")
#
# m4=glm(LOC ~ Number_days+I(Number_days^2)+I(Number_days^3)+I(Number_days^4), data=latest_version)
# m4=glm(LOC ~ poly(Number_days, degree=4), data=latest_version)