In [1]:
from config import *
from main_package.utils import csv_to_fixed_params
from typing import Dict, Iterable, List, Set, Tuple
from math import exp, inf
from numpy.random import normal
from scipy.optimize import minimize

In [2]:
train_fixed_params = csv_to_fixed_params("assistment/train_v3")
test_fixed_params = csv_to_fixed_params("assistment/test_v3")

In [3]:
def logistic_funciton(m: float):
    try:
        return 1 / (1 + exp(-m))
    except OverflowError:
        if -m < 0:
            return 1
        else:
            return 0


In [9]:
from time import perf_counter

In [11]:
current_min = inf
min_delta = 0.001
def factor_analysis(
        params: List[Dict[str, float]],
        fixed_params: List[Iterable]
    ) -> Tuple[float, List[float]]:
    start = perf_counter()
    global current_min
    skill_difficulty, scale_correct, scale_incorrect = params
    
    user_id, skill_id, correct = fixed_params
    assert len(user_id) == len(skill_id) == len(correct)
    n = len(skill_id)

    successes = {}
    failures = {}
    predictions = []
    sum_squared_residuals = 0

    for i in range(n):
        m_value = 0
        for skill in skill_id[i].split("_"):
            m_value += (
                skill_difficulty[skill] +
                scale_correct[skill] * successes.get(skill, 0) +
                scale_incorrect[skill] * failures.get(skill, 0)
            )

            if correct[i]:
                if skill not in successes:
                    successes[skill] = 0
                successes[skill] += 1
            else:
                if skill not in failures:
                    failures[skill] = 0
                failures[skill] += 1
        prediction = logistic_funciton(m_value)
        predictions.append(prediction)
        sum_squared_residuals += (correct[i] - prediction)**2
    
    '''
    if sum_squared_residuals + min_delta <= current_min:
        current_min = sum_squared_residuals
        with open("results.txt", "a") as f:
            f.write(f"ssr: {sum_squared_residuals}\n")
            f.write(f"params: {str(params)}\n")
    '''

    end = perf_counter()
    print(f"time for factor analysis: {round(end-start, 1)}")

    return (sum_squared_residuals, predictions)


In [12]:
def find_all_skills(fixed_params: List[Iterable]) -> Set[str]:
    _, skill_id, _ = fixed_params
    return {
        skill 
        for merged_skills in skill_id 
        for skill in merged_skills.split("_")
    }
#find_all_skills(train_fixed_params)

In [13]:
def construct_initial_params(fixed_params: List[Iterable]):
    skills = list(find_all_skills(fixed_params))
    initial_params = (
        [normal(loc=0, scale=0.5) for _ in skills] + #skill difficulty
        [normal(loc=0.2, scale=0.5) for _ in skills] + #scale correct
        [normal(loc=0.1, scale=0.5) for _ in skills] #scale incorrect
    )
    return initial_params, skills

#initial_params, skills = construct_initial_params(train_fixed_params)
    

In [14]:
def pfa_minimize(params: List[int], fixed_params: List[Iterable], skills: List[str]):
    assert len(params) == 3*len(skills)
    skill_difficulty = {}
    scale_correct = {}
    scale_incorrect = {}
    for i in range(len(skills)):
        skill_difficulty[skills[i]] = params[len(skills)*0 + i]
        scale_correct[skills[i]] = params[len(skills)*1 + i]
        scale_incorrect[skills[i]] = params[len(skills)*2 + i]
    return factor_analysis([skill_difficulty, scale_correct, scale_incorrect], fixed_params)[0]

In [16]:
def find_good_initial_params(fixed_params: List[Iterable], num_iteration=100):
    best_params, skills = construct_initial_params(fixed_params)
    start = perf_counter()
    current_best_ssr = pfa_minimize(best_params, fixed_params, skills)
    end = perf_counter()
    print(f"pfa minimize: {round(end-start, 1)}")
    for _ in range(num_iteration - 1):
        new_initial_params, skills = construct_initial_params(fixed_params)
        start = perf_counter()
        new_ssr = pfa_minimize(new_initial_params, fixed_params, skills)
        end = perf_counter()
        print(f"pfa minimize: {round(end-start, 1)}")
        if new_ssr < current_best_ssr:
            current_best_ssr = new_ssr
            best_params = new_initial_params
    with open("best_initial_params.txt", "a") as f:
        f.write(f"num_iteration: {num_iteration}\nssr: {current_best_ssr}\nbest_params: {best_params}\n")
    return current_best_ssr, best_params

skills, initial_params = find_good_initial_params(train_fixed_params, 5)

time for factor analysis: 2.9
pfa minimize: 2.9
time for factor analysis: 2.1
pfa minimize: 2.1
time for factor analysis: 1.8
pfa minimize: 1.8
time for factor analysis: 1.9
pfa minimize: 1.9
time for factor analysis: 1.8
pfa minimize: 1.8


In [33]:
#pfa_minimize(initial_params, train_fixed_params, skills)
current_min = inf
result = minimize(pfa_minimize, initial_params, (train_fixed_params, skills))