In [133]:
import torch
import torch.nn as nn
import pandas as pd
import numpy as np

In [0]:
def preprocess_subtopics():
    # preprocessing GPT tutor topics
    df = pd.read_csv('../data/GPT_tutor_topics(sub_topics_included).csv')
    # for every Sub_topic_z
    # append the grade, education level, and Math topic to the subtopic
    # allows for 
    # specification of sub_topic difficulty
    # having same subtopic with different levels/topics
    # example: (2: Elementary: Addition: 1 digit addition)
    for row, df_entry in df.iterrows(): #  df_entry is just a copy of a df row
        for i in range(1, 6):
            grade = df_entry['Grade']
            education_lvl = df_entry['Education Level']
            topic = df_entry['Math Topic']
            df.at[row,f'Sub_topic_{i}'] =  f"{grade}:{education_lvl}:{topic}: {df.at[row,f'Sub_topic_{i}']}"

    # put all subtopics into column
    subtopics = pd.concat(
        [df['Sub_topic_1'],
         df['Sub_topic_2'],
         df['Sub_topic_3'],
         df['Sub_topic_4'],
         df['Sub_topic_5']], axis=0)

    # name subtopics column 'Sub_topic'
    subtopics_str = subtopics.rename('Sub_topic')

    # remove duplicates
    # why are there duplicates (needed to fill up 5 suptopics for a topic/school level
    subtopics_str.drop_duplicates(inplace= True) # 2 duplicates in file

    # randomize subtopic rows
    np.random.seed(42) # random seed
    subtopics_str = subtopics_str.sample(frac=1).reset_index(drop=True)

    # if you want, convert df -> csv 
    export_csv = input("Type '1' if you want to export the subtopics into a csv?")
    if export_csv == 1:
        df.to_csv('subtopics.csv',index = False)
preprocess_subtopics()

In [140]:

# 30 x 40 tensor with random values

stud_data = torch.rand(32, 40) #TODO 

num_mistakes = 34
stud_mistakes = torch.rand(num_mistakes, 2)

subtopics = pd.read_csv('../data/preprocessed/subtopics.csv') 

In [142]:
subtopics

Unnamed: 0,Grade,Education Level,Math Topic,Sub_topic_1,Sub_topic_2,Sub_topic_3,Sub_topic_4,Sub_topic_5
0,1,Elementary,Counting numbers,1:Elementary:Counting numbers: Counting objects,1:Elementary:Counting numbers: Counting forwar...,1:Elementary:Counting numbers: Skip counting b...,1:Elementary:Counting numbers: Counting in dif...,1:Elementary:Counting numbers: Counting on a n...
1,1,Elementary,Comparing numbers,1:Elementary:Comparing numbers: Comparing numb...,1:Elementary:Comparing numbers: Comparing numb...,1:Elementary:Comparing numbers: Comparing numbers,1:Elementary:Comparing numbers: Comparing quan...,1:Elementary:Comparing numbers: Comparing values
2,1,Elementary,Addition within 20,1:Elementary:Addition within 20: Addition with...,1:Elementary:Addition within 20: Addition with...,1:Elementary:Addition within 20: Word problems...,1:Elementary:Addition within 20: Adding three ...,1:Elementary:Addition within 20: Solving addit...
3,1,Elementary,Subtraction within 20,1:Elementary:Subtraction within 20: Subtractio...,1:Elementary:Subtraction within 20: Subtractio...,1:Elementary:Subtraction within 20: Word probl...,1:Elementary:Subtraction within 20: Subtractin...,1:Elementary:Subtraction within 20: Solving su...
4,1,Elementary,Place value within 100,1:Elementary:Place value within 100: Place val...,1:Elementary:Place value within 100: Expanded ...,1:Elementary:Place value within 100: Comparing...,1:Elementary:Place value within 100: Skip coun...,1:Elementary:Place value within 100: Represent...
...,...,...,...,...,...,...,...,...
121,12,High School,Trigonometric identities and equations,12:High School:Trigonometric identities and eq...,12:High School:Trigonometric identities and eq...,12:High School:Trigonometric identities and eq...,12:High School:Trigonometric identities and eq...,12:High School:Trigonometric identities and eq...
122,12,High School,Analytic geometry and vectors,12:High School:Analytic geometry and vectors: ...,12:High School:Analytic geometry and vectors: ...,12:High School:Analytic geometry and vectors: ...,12:High School:Analytic geometry and vectors: ...,12:High School:Analytic geometry and vectors: ...
123,12,High School,Probability and statistics,12:High School:Probability and statistics: Pro...,12:High School:Probability and statistics: Dis...,12:High School:Probability and statistics: Sta...,12:High School:Probability and statistics: Hyp...,12:High School:Probability and statistics: Reg...
124,12,High School,Calculus,12:High School:Calculus: Limits and continuity,12:High School:Calculus: Derivatives and their...,12:High School:Calculus: Integrals and their a...,12:High School:Calculus: Differential equation...,12:High School:Calculus: Multivariable calculu...


In [143]:
# hot encode subtopics with 0s and 1s 
subtopics_enc = pd.get_dummies(subtopics).astype(int) # (astype(int) make this 0s and 1s as opposed to T and F)
# convert this df into tensor
subtopics_tensor = torch.tensor(subtopics_enc.values) # .values get np array of the data