In [1]:
library(magrittr)
library(lme4)
library(optimx)
library(dplyr)
library(readr)
library(jtools)
library(reticulate)
library(PerformanceAnalytics)
library(tidyverse)
library(ggcorrplot)
library(ggplot2)

Loading required package: Matrix


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union


Loading required package: xts

Loading required package: zoo


Attaching package: ‘zoo’


The following objects are masked from ‘package:base’:

    as.Date, as.Date.numeric



#                                                                             #
# The dplyr lag() function breaks how base R's lag() function is supposed to  #
# work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or       #
# source() into this session won't work correctly.                            #
#                                                                             #
# Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
# conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop           #
# dplyr from breaking base R

In [2]:
my_data <- read.delim(file = '/kaggle/input/predicting-learning-outcomes/ds5165_student_step_All_Data_7337_2022_0107_094330.txt')
names(my_data)

In [3]:
my_data <- my_data %>%
  filter(Opportunity..Default. != "" & First.Attempt != "") %>%
  group_by(KC..Default., Anon.Student.Id) %>%
  arrange(First.Transaction.Time) %>%
  mutate(Opportunity..Default.Processed = row_number()) %>%
  ungroup()

In [4]:
my_data <- my_data %>%
  rename(
    individual = Anon.Student.Id,
    KC = KC..Default.,
    opportunity = Opportunity..Default.Processed,
    response = First.Attempt
  ) %>%
  arrange(individual, KC) %>%
  group_by(individual, KC) %>%
  mutate(
    opportunity_reverse = max(opportunity) - opportunity
  ) %>%
  ungroup() %>%
  mutate(
    response = ifelse(response == "correct", 1, 0),
    opportunity0 = opportunity - 1
  ) %>%
  filter(!(is.na(KC) | KC == "" | KC == "0" | KC == 0))

In [5]:
summary(my_data$opportunity0)
summary(my_data$opportunity_reverse)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   0.00    3.00    8.00   11.89   16.00   96.00 

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   0.00    3.00    8.00   11.89   16.00   96.00 

In [6]:
train_data = my_data

In [7]:
# Fit the glmer model
start.time <- Sys.time()
model_iafm <- train_data %>%
  glmer(response ~ opportunity0 + (opportunity0 | KC) + (opportunity0 | individual),
        data = ., family = binomial(), nAGQ = 0)
end.time <- Sys.time()

# Print fitting time
round(end.time - start.time, 2)

# Save the model
saveRDS(model_iafm, file = "/kaggle/working/model_iafm_5165.rds")

Time difference of 3.95 mins

In [8]:
summ(model_iafm)

[4mMODEL INFO:[24m
[3mObservations:[23m 32458
[3mDependent Variable:[23m response
[3mType:[23m Mixed effects generalized linear regression
[3mError Distribution: [23mbinomial
[3mLink function: [23mlogit 

[4mMODEL FIT:[24m
[3mAIC[23m = 37774.13, [3mBIC[23m = 37841.23
[3mPseudo-R² (fixed effects)[23m = 0.04
[3mPseudo-R² (total)[23m = 0.56 

[4mFIXED EFFECTS:
[24m-------------------------------------------------
                      Est.   S.E.   z val.      p
------------------ ------- ------ -------- ------
(Intercept)          -0.27   0.15    -1.79   0.07
opportunity0          0.04   0.01     3.10   0.00
-------------------------------------------------
[4m
RANDOM EFFECTS:
[24m---------------------------------------
   Group       Parameter     Std. Dev. 
------------ -------------- -----------
 individual   (Intercept)      1.04    
 individual   opportunity0     0.02    
     KC       (Intercept)      0.89    
     KC       opportunity0     0.08    
------

In [9]:
# Fit the reverse glmer model
start.time <- Sys.time()
model_iafm_reverse = train_data %>%
  glmer(response ~ opportunity_reverse + (opportunity_reverse|KC) + (opportunity_reverse|individual), 
        data=., family=binomial(), nAGQ = 0)
end.time <- Sys.time()

# Print fitting time
round(end.time - start.time,2)

# Save the model
saveRDS(model_iafm_reverse, file = "/kaggle/working/model_iafm_reverse_5165.rds")

Time difference of 4.27 mins

In [10]:
summ(model_iafm_reverse)

[4mMODEL INFO:[24m
[3mObservations:[23m 32458
[3mDependent Variable:[23m response
[3mType:[23m Mixed effects generalized linear regression
[3mError Distribution: [23mbinomial
[3mLink function: [23mlogit 

[4mMODEL FIT:[24m
[3mAIC[23m = 37811.43, [3mBIC[23m = 37878.53
[3mPseudo-R² (fixed effects)[23m = 0.09
[3mPseudo-R² (total)[23m = 0.52 

[4mFIXED EFFECTS:
[24m--------------------------------------------------------
                             Est.   S.E.   z val.      p
------------------------- ------- ------ -------- ------
(Intercept)                  0.10   0.16     0.62   0.53
opportunity_reverse         -0.06   0.01    -5.57   0.00
--------------------------------------------------------
[4m
RANDOM EFFECTS:
[24m----------------------------------------------
   Group           Parameter        Std. Dev. 
------------ --------------------- -----------
 individual       (Intercept)         0.94    
 individual   opportunity_reverse     0.02    
     KC  