In [3]:
import pandas as pd
import numpy as np

In [4]:
class TMUADataLoader:
    def __init__(self, data_path):
        self.data_path = data_path
        self.df = pd.read_csv(data_path)
        self.years = self.df['Year'].unique()
        self.papers = self.df['P'].unique()
        self.categories = np.unique(np.concatenate([self.df.Category.unique(), self.df["Sub-Category"].dropna().unique()]))
        self.approaches = self.df['Approach'].dropna().unique()
        self.types = self.df['Type'].dropna().unique()

    def get_filter_options(self):
        return {
            'Years': self.years,
            'Papers': self.papers,
            'Categories': self.categories,
            'Approaches': self.approaches,
            'Types': self.types
        }

    def get_all_questions(self):
        return self.df


In [5]:
data_loader = TMUADataLoader('questions.csv')
filter_options = data_loader.get_filter_options()
filter_options

{'Years': array(['specimen', '2016'], dtype=object),
 'Papers': array(['P1', 'P2'], dtype=object),
 'Categories': array(['Algebra', 'Calculus', 'Counterexample', 'Exp & Log', 'Geometry',
        'Graph', 'N True', 'Necessary & Sufficient', 'Probability',
        'Sequence & Series', 'Steps of Proofs', 'Trigonometry', 'Weird'],
       dtype=object),
 'Approaches': array(['Substitution', 'Graph'], dtype=object),
 'Types': array(['Number of Roots', 'Stationary Point', 'Binomial Expansion',
        'Implication'], dtype=object)}

In [6]:
df = pd.read_csv('questions.csv')

df['key'] = df.Year.astype(str) + df.P.astype(str) + "Q" + df.Q.astype(str)

In [28]:
import re

def parse_question(string):
    pattern = r'(\w+)P(\d+)Q(\d+)'
    match = re.search(pattern, string)

    if match:
        year = match.group(1)
        p = match.group(2)
        q = match.group(3)
        
        return year, p, q
    else:
        # error
        print(f"Error parsing question {string}")
        return None, None, None

In [56]:
import os
LIST_PATH = './Lists/'
list_files = [f for f in os.listdir(LIST_PATH) if os.path.isfile(os.path.join(LIST_PATH, f))]

lists = {}
# parse the list files
for list_file in list_files:
    with open(LIST_PATH + list_file, 'r') as f:
        list_file = list_file.replace(".txt", "")
        lists[list_file] = []
        for line in f:
            if not line.strip():
                continue
            string = line.strip().replace(" ", "").replace("\n", "")
            year, paper, question = parse_question(string)
            year = year.replace("spec", "specimen")
            print(year, paper, question)
            lists[list_file].append(year + "P" + paper + "Q" + question)

# process the df
print(lists)
df['in_list'] = [[] for _ in range(len(df))]

for list_name, list in lists.items():
    mask = df['key'].isin(list)
    df.loc[mask, 'in_list'] = df.loc[mask, 'in_list'].apply(lambda x: x + [list_name])
    
    filtered_df = df[mask]
    filtered_df['key'] = pd.Categorical(filtered_df['key'], categories=lists['2024_Lesson1'], ordered=True)
    sorted_df = filtered_df.sort_values('key')
    print(sorted_df[['key', 'P', 'Q', 'in_list']])

df

specimen 1 1
specimen 1 2
specimen 1 3
specimen 1 4
2016 1 2
specimen 1 4
specimen 1 17
specimen 1 11
specimen 1 19
2016 1 14
specimen 1 5
specimen 1 8
specimen 1 18
{'2024_Homework1': ['specimenP1Q1', 'specimenP1Q2', 'specimenP1Q3', 'specimenP1Q4'], '2024_Lesson1': ['2016P1Q2', 'specimenP1Q4', 'specimenP1Q17', 'specimenP1Q11', 'specimenP1Q19', '2016P1Q14', 'specimenP1Q5', 'specimenP1Q8', 'specimenP1Q18']}


KeyError: 'a'

In [9]:
sorted(list(df.Category.unique()))

['Algebra',
 'Calculus',
 'Counterexample',
 'Exp & Log',
 'Geometry',
 'Graph',
 'N True',
 'Necessary & Sufficient',
 'Probability',
 'Sequence & Series',
 'Steps of Proofs',
 'Trigonometry',
 'Weird']

In [10]:
df["Sub-Category"].dropna().unique()

array(['Trigonometry', 'Algebra', 'Exp & Log', 'Graph', 'Calculus'],
      dtype=object)

In [11]:
list(np.unique(np.concatenate([df.Category.unique(), df["Sub-Category"].dropna().unique()])))

['Algebra',
 'Calculus',
 'Counterexample',
 'Exp & Log',
 'Geometry',
 'Graph',
 'N True',
 'Necessary & Sufficient',
 'Probability',
 'Sequence & Series',
 'Steps of Proofs',
 'Trigonometry',
 'Weird']