In [6]:
import torch
import torch.nn as nn
import pandas as pd
import numpy as np

In [8]:
def preprocess_subtopics():
    # preprocessing GPT tutor topics
    df = pd.read_csv('../data/GPT_tutor_topics(subtopics_included).csv')
    # for every Sub_topic_z
    # append the grade, education level, and Math topic to the subtopic
    # allows for 
    # specification of sub_topic difficulty
    # having same subtopic with different levels/topics
    # (grade: School Level: Topic: Subtopic)
    # example: (2: Elementary: Addition: 1 digit addition)
    for row, df_entry in df.iterrows(): #  df_entry is just a copy of a df row
        for i in range(1, 6):
            grade = df_entry['Grade']
            education_lvl = df_entry['Education Level']
            topic = df_entry['Math Topic']
            df.at[row,f'Sub_topic_{i}'] =  f"{grade}:{education_lvl}:{topic}: {df.at[row,f'Sub_topic_{i}']}"

    # put all subtopics into column
    subtopics = pd.concat(
        [df['Sub_topic_1'],
         df['Sub_topic_2'],
         df['Sub_topic_3'],
         df['Sub_topic_4'],
         df['Sub_topic_5']], axis=0)

    # name subtopics column 'Sub_topic'
    subtopics_str = subtopics.rename('Grade: Education Level: Topic: Sub Topic')

    # remove duplicates
    # why are there duplicates (needed to fill up 5 suptopics for a topic/school level
    subtopics_str.drop_duplicates(inplace= True) # 2 duplicates in file

    # randomize subtopic rows
    np.random.seed(42) # random seed
    subtopics_str = subtopics_str.sample(frac=1).reset_index(drop=True)

    return subtopics_str
subtopics = preprocess_subtopics()

In [9]:
subtopics.to_csv('subtopics.csv',index = False) # exports subtopics csv

In [10]:
# 30 x 40 tensor with random values

stud_data = torch.rand(32, 40) #TODO 
num_mistakes = 34
stud_mistakes = torch.rand(num_mistakes, 2)

subtopics = pd.read_csv('../data/preprocessed/subtopics.csv') 

In [11]:
subtopics

Unnamed: 0,Grade: Education Level: Topic: Sub Topic
0,8:Middle School:Linear equations: Analyzing an...
1,9:High School:Linear equations: Analyzing and ...
2,5:Elementary:Operations with decimals of divis...
3,9:High School:Linear equations: Writing and so...
4,10:High School:Logarithms: Applying logarithmi...
...,...
623,7:Middle School:Rational numbers and rational ...
624,10:High School:Quadratic Equations: Solving qu...
625,2:Elementary:Measurement of weight: Comparing ...
626,6:Middle School:Division of fractions: Solving...


In [5]:
# hot encode subtopics with 0s and 1s 
subtopics_enc = pd.get_dummies(subtopics).astype(int) # (astype(int) make this 0s and 1s as opposed to T and F)
# convert this df into tensor
subtopics_tensor = torch.tensor(subtopics_enc.values) # .values get np array of the data