In [None]:
install.packages('forecast', repos='http://cran.rstudio.com/')
install.packages('rjson', repos='http://cran.rstudio.com/')

In [None]:
#Loading datasets and pre-trained models:
data <- read.csv('Train+Week1&2.csv')
real <- read.csv('Week3.csv')

TModel <- readRDS('TModel.RData') #Pre-trained TBATS Model (94 different models for each user ID)
AModel <- readRDS('AModel.RData') #Pre-trained ARIMA Model (94 different models for each user ID)
SModel <- readRDS('SModel.RData') #Pre-trained SES Model (94 different models for each user ID)

id <- unique(data$subscriber_ecid) #Getting unique user IDs

#Assigning integer numbers 1:94 to each user ID
IDIndex <- data.frame()
c <- 1
for (i in id) {
  IDIndex[c,1] <- i
  IDIndex[c,2] <- c
  c <- c+1
}
colnames(IDIndex) <- c("UserID", "Index")

In [None]:
#Single User Prediction API Based on Used ID

#* @get /Pred
Pred <- function(ID){
  id <- IDIndex$Index[IDIndex$UserID==ID] #Identifying the index of the given User ID
  pred_tbats <- forecast(TModel[[id]], h = 10) #Getting TBATS predictions of the next 10 days for the specified user "ID"
  pred_arima <- forecast(AModel[[id]], h = 10) #Getting ARIMA predictions of the next 10 days for the specified user "ID"
  pred_ses <- forecast(SModel[[id]], h = 10)   #Getting SES predictions of the next 10 days for the specified user "ID"
  r <- real$data_usage_volume[real$subscriber_ecid==ID] #Actual values of the next 10 days
  result <- vector(mode="list", length=13) #Defining a list to store predicted values (lines 1-11), Mean and SD (Line 12) & indexes of the abnormal values (Line 13)
  result[[1]] <- c("Actual", "TBATS", "ARIMA", "SES") #Header of the first part of the list
  for (i in 1:10) {
    result[[i+1]] <- c(r[i], pred_tbats$mean[i], pred_arima$mean[i], pred_ses$mean[i])
  } #Filling the first part of the list with the actual and predicted values
  UserUsage <-  data$data_usage_volume[data$subscriber_ecid==ID]
  UserMean <-  mean(UserUsage)
  UserSD <- sd(UserUsage)
  result[[12]] <- paste("User Mean:", UserMean, "User SD:", UserSD) #Filling the second part of the list with User Average and Standard Deviation
  ind <- c()
  for (j in 1:length(UserUsage)) {
    if (UserUsage[j] > UserMean + 3*UserSD) ind[j]<-j
  } #Finding indexes of the abnormal values
  ind <- ind[!is.na(ind)] #Removing NAs
  result[[13]] <- c("Abnormal Indexes", ind) #Filling the last part of the list with abnormal indexes
  jsonResult <- toJSON(result) #converting the list into JSON format
  addd <- paste0("User(",ID,").json") #Defining the name of the file which containts the User ID
  write(jsonResult, addd) #Saving the output in JSON format
}

In [None]:
#Single User Plotting API Based on User ID

#* @serializer png
#* @get /plot
UserPlot <- function(ID){
  UserUsage <-  data$data_usage_volume[data$subscriber_ecid==ID] #Selecting target values of the specified user ID
  
  #Plotting the main time-series
  plot(UserUsage,
       main = "Daily Internet Usage",
       xlab = "Days",
       ylab = "Volume")
  
  UserMean <-  mean(UserUsage) #Average internet usage of the specified user ID
  UserSD <- sd(UserUsage) #Standard Deviation of the target for the specified user ID
  TotMean <- mean(data$data_usage_volume) #Average internet usage of the all users
  
  par(xpd=FALSE)
  abline(h=TotMean, pch=18, col="blue", lty=2) #Adding the "Total Mean" line to the plot
  abline(h=UserMean, pch=18, col="green", lty=2) #Adding the "User Mean" line to the plot
  
  #Creating a "Legend" for the plot
  par(xpd=TRUE)
  legend("bottomleft", inset=c(0,-0.5), legend=c("Total Average", "User Average"), col=c("blue", "green"), lty=2, cex=0.8)
  
  #Simple Anomaly Detection using Gaussian Distribution (Values outside of the 6-sigma boundary (only the positive side))
  ind <- c()
  for (i in 1:length(UserUsage)) {
    if (UserUsage[i] > UserMean + 3*UserSD) ind[i]<-i #Getting the index of abnormal values
  }
  ind <- ind[!is.na(ind)] #Removing NAs from the vector

  points(ind, UserUsage[ind],col="red") #Changing the color of abnormal points into "red"
}