In [1]:
library(magrittr)
library(lme4)
library(optimx)
library(dplyr)
library(readr)
library(jtools)
library(reticulate)
library(PerformanceAnalytics)
library(tidyverse)
library(ggcorrplot)
library(ggplot2)

Loading required package: Matrix


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union


Loading required package: xts

Loading required package: zoo


Attaching package: ‘zoo’


The following objects are masked from ‘package:base’:

    as.Date, as.Date.numeric



#                                                                             #
# The dplyr lag() function breaks how base R's lag() function is supposed to  #
# work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or       #
# source() into this session won't work correctly.                            #
#                                                                             #
# Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
# conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop           #
# dplyr from breaking base R

In [2]:
my_data <- read.delim(file = '/kaggle/input/predicting-learning-outcomes/multiskill_converted_All_Data_1884_2013_0215_193821.txt')
names(my_data)

In [3]:
my_data <- my_data %>% 
    rename(individual = `Anon.Student.Id`,pid = `Problem.Name`, time = `First.Transaction.Time`,corrects = Corrects, incorrects = Incorrects, KC = `KC.KTracedSkills.`, opportunity = `Opportunity..KTracedSkills.`, response = `First.Attempt`) %>%
    arrange(individual, KC, time) %>%
    group_by(individual, KC) %>%
    mutate(
        prev_pid = lag(pid),
        prev_response = lag(response),
        opportunity_reverse = max(opportunity) - opportunity
      ) %>%
    ungroup() %>%
  mutate(
    response = ifelse(response == "correct", 1, 0),
    prev_response = ifelse(prev_response == "correct", 1, 0),
    opportunity0 = opportunity-1
  ) %>%
  set_tidy_names(syntactic = FALSE) %>%
  filter(!(is.na(KC) | KC == "" | KC=="0" | KC == 0 ))  # remove empty KC

In [4]:
ds_predict = my_data %>%
  group_by(individual, KC) %>%
  slice(which.max(opportunity0))

In [5]:
model_iafm <- readRDS("/kaggle/input/predicting-learning-outcomes/model_iafm.rds")
StudentInterceptsiAFM = data.frame(
  cbind(row.names(ranef(model_iafm)$individual), 
        ranef(model_iafm)$individual[,1]) 
)
colnames(StudentInterceptsiAFM) = c("individual","int_iAFM")
StudentInterceptsiAFM$int_iAFM = as.numeric(StudentInterceptsiAFM$int_iAFM) + + as.numeric(coef(summary(model_iafm))[1])
StudentInterceptsiAFM

individual,int_iAFM
<chr>,<dbl>
stu_0113m7,1.7070556
stu_01n2uy,1.4049450
stu_03nig7,1.5640231
stu_05gygj,1.6731734
stu_09idnd,1.6137653
stu_0bztgj,1.1058427
stu_0h52n5,1.5952662
stu_0ktmj0,0.9756250
stu_0kwlpu,1.1704697
stu_0o7dmu,2.1273446


In [6]:
ds_predict$pred_iafm = predict(model_iafm,ds_predict,type="response",allow.new.levels=TRUE)
PredictedScores = ds_predict %>% 
  group_by(individual) %>%
  summarise(
    PredAvgiAFM = mean(pred_iafm),
  )
PredictedScores

individual,PredAvgiAFM
<chr>,<dbl>
stu_0113m7,0.8224277
stu_01n2uy,0.8695334
stu_03nig7,0.8147798
stu_05gygj,0.8096688
stu_09idnd,0.8338457
stu_0bztgj,0.8742001
stu_0h52n5,0.8188979
stu_0ktmj0,0.8770987
stu_0kwlpu,0.8721539
stu_0o7dmu,0.8900974


In [7]:
testScores = read_csv(file="/kaggle/input/predicting-learning-outcomes/CognitiveTutorData2011-12_CarvalhoExport.csv", col_names = TRUE)
testScores = testScores %>%
  select(ID, PriorFinalGrade, Q1Math, Q2Math, Q3Math, Q4Math, FinalMath ) %>%
  rename(individual = ID)
cols <- c("PriorFinalGrade", "Q1Math", "Q2Math", "Q3Math", "Q4Math", "FinalMath")
# drop the values over 100 in the selected columns
testScores <- testScores[!(testScores$PriorFinalGrade > 100 | testScores$Q1Math > 100 | testScores$Q2Math > 100 | testScores$Q3Math > 100 | testScores$Q4Math > 100 | testScores$FinalMath > 100), ]
testScores

[1mRows: [22m[34m389[39m [1mColumns: [22m[34m188[39m
[36m──[39m [1mColumn specification[22m [36m────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m ","
[31mchr[39m   (4): ID, Teacher, ClassTitle, PSSAGrade
[32mdbl[39m (184): School, Curriculum, Sex, NonWhite, FreeOrReducedLunch, PaSpeciale...

[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.


individual,PriorFinalGrade,Q1Math,Q2Math,Q3Math,Q4Math,FinalMath
<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
stu_09idnd,81,87,92,86,77,82
stu_0a8mv3,62,61,72,70,39,61
stu_0bztgj,79,87,64,84,87,80
stu_0pj6aw,79,74,71,66,64,68
stu_0swky2,88,84,85,76,68,76
stu_0uf00w,84,78,76,73,64,72
stu_1c5uky,80,77,71,75,73,74
stu_2h41ko,64,73,85,76,90,77
stu_2u1hbu,63,65,51,60,67,60
stu_2wzhpw,87,77,76,84,92,82


In [8]:
testScoresAll = testScores %>%
  left_join(StudentInterceptsiAFM, by="individual") %>%
  left_join(PredictedScores, by="individual")
testScoresAll$int_iAFM = as.numeric(testScoresAll$int_iAFM)
testScoresAll <- na.omit(testScoresAll)
testScoresAll

individual,PriorFinalGrade,Q1Math,Q2Math,Q3Math,Q4Math,FinalMath,int_iAFM,PredAvgiAFM
<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
stu_09idnd,81,87,92,86,77,82,1.6137653,0.8338457
stu_0bztgj,79,87,64,84,87,80,1.1058427,0.8742001
stu_0pj6aw,79,74,71,66,64,68,0.9684165,0.8234351
stu_0swky2,88,84,85,76,68,76,2.3084323,0.8940262
stu_0uf00w,84,78,76,73,64,72,0.6999406,0.7902201
stu_1c5uky,80,77,71,75,73,74,1.7480176,0.8308995
stu_2h41ko,64,73,85,76,90,77,0.9955341,0.7477845
stu_2u1hbu,63,65,51,60,67,60,0.7924347,0.8635738
stu_2wzhpw,87,77,76,84,92,82,2.0794892,0.8866626
stu_3lzg2v,84,97,89,87,84,89,1.2822518,0.8654703


In [9]:
testScoresAll %>%
    lm(FinalMath ~ PriorFinalGrade, data = .) %>%
    summ()

testScoresAll %>%
    lm(FinalMath ~ PredAvgiAFM + PriorFinalGrade, data = .) %>%
    summ()

testScoresAll %>%
    lm(FinalMath ~ int_iAFM + PriorFinalGrade, data = .) %>%
    summ()

# testScoresAll %>%
#     lm(FinalMath ~ int_iAFM_reverse + PriorFinalGrade, data = .) %>%
#     summ()

[4mMODEL INFO:[24m
[3mObservations:[23m 228
[3mDependent Variable:[23m FinalMath
[3mType:[23m OLS linear regression 

[4mMODEL FIT:[24m
[3mF[23m(1,226) = 172.37, [3mp[23m = 0.00
[3mR² = [23m0.43
[3mAdj. R² = [23m0.43 

[3mStandard errors: OLS[23m
----------------------------------------------------
                         Est.   S.E.   t val.      p
--------------------- ------- ------ -------- ------
(Intercept)             18.50   5.01     3.69   0.00
PriorFinalGrade          0.78   0.06    13.13   0.00
----------------------------------------------------

[4mMODEL INFO:[24m
[3mObservations:[23m 228
[3mDependent Variable:[23m FinalMath
[3mType:[23m OLS linear regression 

[4mMODEL FIT:[24m
[3mF[23m(2,225) = 97.55, [3mp[23m = 0.00
[3mR² = [23m0.46
[3mAdj. R² = [23m0.46 

[3mStandard errors: OLS[23m
------------------------------------------------------
                          Est.    S.E.   t val.      p
--------------------- -------- ------- -------- ------
(Intercept)             -11.41    9.53    -1.20   0.23
PredAvgiAFM              39.94   10.94     3.65   0.00
PriorFinalGrade           0.73    0.06    12.14   0.00
------------------------------------------------------

[4mMODEL INFO:[24m
[3mObservations:[23m 228
[3mDependent Variable:[23m FinalMath
[3mType:[23m OLS linear regression 

[4mMODEL FIT:[24m
[3mF[23m(2,225) = 113.83, [3mp[23m = 0.00
[3mR² = [23m0.50
[3mAdj. R² = [23m0.50 

[3mStandard errors: OLS[23m
----------------------------------------------------
                         Est.   S.E.   t val.      p
--------------------- ------- ------ -------- ------
(Intercept)             22.27   4.74     4.69   0.00
int_iAFM                 5.38   0.95     5.64   0.00
PriorFinalGrade          0.63   0.06    10.23   0.00
----------------------------------------------------

In [10]:
exclude_columns <- c("individual")
columns_to_scale <- setdiff(names(testScoresAll), exclude_columns)
testScoresAllScaled <- testScoresAll
testScoresAllScaled[columns_to_scale] <- scale(testScoresAll[columns_to_scale])
testScoresAllScaled

individual,PriorFinalGrade,Q1Math,Q2Math,Q3Math,Q4Math,FinalMath,int_iAFM,PredAvgiAFM
<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
stu_09idnd,-0.369947329,0.14302990,0.64790556,0.238605977,-0.5562471,-0.2062058,-0.01801467,-0.679907459
stu_0bztgj,-0.622108736,0.14302990,-1.86346607,0.078599616,0.1751505,-0.4189430,-1.00948363,0.252263750
stu_0pj6aw,-0.622108736,-1.14947199,-1.23562317,-1.361457636,-1.5070640,-1.6953663,-1.27774058,-0.920386574
stu_0swky2,0.512617599,-0.15523977,0.02006265,-0.561425829,-1.2145049,-0.8444175,1.33798077,0.710239408
stu_0uf00w,0.008294783,-0.75177910,-0.78716395,-0.801435371,-1.5070640,-1.2698919,-1.80180777,-1.687639982
stu_1c5uky,-0.496028032,-0.85120232,-1.23562317,-0.641429010,-0.8488061,-1.0571547,0.24404691,-0.747962400
stu_2h41ko,-2.513319295,-1.24889521,0.02006265,-0.561425829,0.3945697,-0.7380488,-1.22480689,-2.667887120
stu_2u1hbu,-2.639399999,-2.04428098,-3.02946005,-1.841476720,-1.2876447,-2.5463152,-1.62125855,0.006800765
stu_2wzhpw,0.386536895,-0.85120232,-0.78716395,0.078599616,0.5408493,-0.2062058,0.89108214,0.540143316
stu_3lzg2v,0.008294783,1.13726212,0.37883003,0.318609158,-0.0442688,0.5383744,-0.66513163,0.050610316


In [11]:
testScoresAllScaled %>%
    lm(FinalMath ~ PriorFinalGrade, data = .) %>%
    summ()

testScoresAllScaled %>%
    lm(FinalMath ~ PredAvgiAFM + PriorFinalGrade, data = .) %>%
    summ()

testScoresAllScaled %>%
    lm(FinalMath ~ int_iAFM + PriorFinalGrade, data = .) %>%
    summ()

[4mMODEL INFO:[24m
[3mObservations:[23m 228
[3mDependent Variable:[23m FinalMath
[3mType:[23m OLS linear regression 

[4mMODEL FIT:[24m
[3mF[23m(1,226) = 172.37, [3mp[23m = 0.00
[3mR² = [23m0.43
[3mAdj. R² = [23m0.43 

[3mStandard errors: OLS[23m
---------------------------------------------------
                        Est.   S.E.   t val.      p
--------------------- ------ ------ -------- ------
(Intercept)             0.00   0.05     0.00   1.00
PriorFinalGrade         0.66   0.05    13.13   0.00
---------------------------------------------------

[4mMODEL INFO:[24m
[3mObservations:[23m 228
[3mDependent Variable:[23m FinalMath
[3mType:[23m OLS linear regression 

[4mMODEL FIT:[24m
[3mF[23m(2,225) = 97.55, [3mp[23m = 0.00
[3mR² = [23m0.46
[3mAdj. R² = [23m0.46 

[3mStandard errors: OLS[23m
---------------------------------------------------
                        Est.   S.E.   t val.      p
--------------------- ------ ------ -------- ------
(Intercept)             0.00   0.05     0.00   1.00
PredAvgiAFM             0.18   0.05     3.65   0.00
PriorFinalGrade         0.61   0.05    12.14   0.00
---------------------------------------------------

[4mMODEL INFO:[24m
[3mObservations:[23m 228
[3mDependent Variable:[23m FinalMath
[3mType:[23m OLS linear regression 

[4mMODEL FIT:[24m
[3mF[23m(2,225) = 113.83, [3mp[23m = 0.00
[3mR² = [23m0.50
[3mAdj. R² = [23m0.50 

[3mStandard errors: OLS[23m
---------------------------------------------------
                        Est.   S.E.   t val.      p
--------------------- ------ ------ -------- ------
(Intercept)             0.00   0.05     0.00   1.00
int_iAFM                0.29   0.05     5.64   0.00
PriorFinalGrade         0.53   0.05    10.23   0.00
---------------------------------------------------