# Library Import


In [1]:
import huggingface_hub as hf # For downloading data from Hugging Face
import pandas as pd # For reading and importing data files into DataFrames used to represent the data


In [2]:
import sys

sys.set_int_max_str_digits(100000)

# Data Import


In [3]:
# TACO Dataset
# Test Files
test_filenames = [
  'ALL/test-00000-of-00001.parquet',
] 

# Train Files
train_filenames = [
  'ALL/train-00001-of-00009.parquet',
  'ALL/train-00002-of-00009.parquet',
  'ALL/train-00003-of-00009.parquet',
  'ALL/train-00004-of-00009.parquet',
  'ALL/train-00005-of-00009.parquet',
  'ALL/train-00006-of-00009.parquet',
  'ALL/train-00007-of-00009.parquet',
  'ALL/train-00008-of-00009.parquet',
]

In [4]:
train_df = pd.DataFrame([])
test_df = pd.DataFrame([])

for filename in train_filenames: # iterate over train_filenames
  path = hf.hf_hub_download( # download dataset
    repo_id='BAAI/TACO', # repo id that contains the files
    repo_type='dataset', # the type should be dataset as the download involves data files
    filename=filename, # the specific filename to be downloaded
  )

  if train_df.empty: # if train_df is still empty
    train_df = pd.read_parquet(path=path) # create a new DataFrame with data instead
  else: # if train_df is not empty
    train_df = pd.concat([train_df, pd.read_parquet(path)], axis=0) # concat the new data along axis=0 so not to create new columns,
                                                                    # instead concat the data as rows

for filename in test_filenames: # iterate over test_filenames
  path = hf.hf_hub_download( # download dataset
    repo_id='BAAI/TACO', # repo id that contains the files
    repo_type='dataset', # the type should be dataset as the download involves data files
    filename=filename, # the specific filename to be downloaded
  )

  if test_df.empty: # if test_df is still empty
    test_df = pd.read_parquet(path=path) # create a new DataFrame with data instead
  else: # if test_df is not empty
    test_df = pd.concat([test_df, pd.read_parquet(path)], axis=0) # concat the new data along axis=0 so not to create new columns,
                                                                    # instead concat the data as rows

# Initial Inspection


train_df and test_df are going to be combined as the project does not involve model testing. This means that there is no need to split the data into training set and testing set. It is more favorable to use all problem sets at once to cover more possibilities and variations.

In [5]:
full_df = pd.concat([train_df, test_df], axis=0) # create a concatenated train_df and test_df

In [6]:
full_df.head()

Unnamed: 0,question,solutions,starter_code,input_output,difficulty,raw_tags,name,source,tags,skill_types,url,Expected Auxiliary Space,time_limit,date,picture_num,memory_limit,Expected Time Complexity
0,"For every string given as input, you need to t...",[],,"{""inputs"": [""1\naab"", ""3\naab\ndddd\nthisisapa...",UNKNOWN_DIFFICULTY,[],subpalindrome-4,hackerearth,[],[],,,,,,,
1,A scientist discovered a strange variation of ...,[],,"{""inputs"": [""3\n0\n7\n15\n655\n2711\n6395\n719...",UNKNOWN_DIFFICULTY,[],,aizu,[],[],,,8.0 seconds,,,134.217728 megabytes,
2,Two integers A and B are the inputs. Write a p...,"[""def GCD(x, y):\n\twhile y:\n\t\t(x, y) = (y,...",,"{""inputs"": [[""3"", ""120 140"", ""10213 312"", ""10 ...",EASY,"['LCM', 'Mathematics', 'Algorithms', 'Number T...",,codechef,"['Mathematics', 'Number theory', 'Implementati...",[],https://www.codechef.com/problems/FLOW016,,1 seconds,2015-04-27,0.0,50000 bytes,
3,Limak is a grizzly bear who desires power and ...,"[""n = int(input())\nv = list(map(int, input()....",,"{""inputs"": [""5\n5 1 11 2 8\n"", ""4\n1 8 8 8\n"",...",EASY,"['greedy', 'implementation']",,codeforces,"['Implementation', 'Greedy algorithms']",['Greedy algorithms'],https://codeforces.com/problemset/problem/574/A,,,2019-12-31,,,
4,"problem\n\nJOI took six subjects: physics, che...","[""lst = []\nfor i in range(6):\n\tn = int(inpu...",,"{""inputs"": [""100\n34\n76\n67\n10\n0"", ""100\n34...",UNKNOWN_DIFFICULTY,[],,aizu,[],[],,,8.0 seconds,,,268.435456 megabytes,


In [7]:
full_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 23616 entries, 0 to 999
Data columns (total 17 columns):
 #   Column                    Non-Null Count  Dtype 
---  ------                    --------------  ----- 
 0   question                  23616 non-null  object
 1   solutions                 23616 non-null  object
 2   starter_code              23616 non-null  object
 3   input_output              23616 non-null  object
 4   difficulty                23616 non-null  object
 5   raw_tags                  23616 non-null  object
 6   name                      3352 non-null   object
 7   source                    23616 non-null  object
 8   tags                      23616 non-null  object
 9   skill_types               23616 non-null  object
 10  url                       18915 non-null  object
 11  Expected Auxiliary Space  2386 non-null   object
 12  time_limit                10519 non-null  object
 13  date                      6204 non-null   object
 14  picture_num               671

All column type is object, can be string or other data structures like list, tuple, or dictionary


In [8]:
eda_df = full_df.copy()

# Utility Functions


### Check condition for any empty row


In [9]:
from typing import Tuple

In [10]:
def empty_rows(data: pd.Series) -> Tuple[pd.Series, int]:
  rows = []
  for row in data:
    if not row or not len(row):
      rows.append(row)

  return (pd.Series(rows), len(rows))

### Check null or empty rows


In [11]:
def null_and_empty_rows(data: pd.Series) -> None:

  empty = empty_rows(data)
  print(f'Null count: {data.isnull().sum()} rows.')
  print(f'Row containing empty data: {empty[1]} rows.')
  print(f'Empty portion {round((empty[1] / data.shape[0]) * 100, 2)}%')

### Check detailed data type


In [12]:
def check_type(data: pd.Series) -> None:
  for row in data:
    if row:
      return type(row)

### First n Non Null Values


In [13]:
def n_non_null_head(data: pd.Series, n: int = 5) -> pd.Series:
  head = []
  for row in data:
    if len(head) >= n:
      break
    if row:
      head.append(row)

  if len(head) < n:
    head.extend([None] * (n - len(head)))
  
  return pd.Series(head)

### Initial Check


In [14]:
def initial_check(data: pd.Series) -> None:
  column_type = check_type(data)
  print(f'Type: {column_type}\n')
  print('First 5 non-null rows:')
  print(n_non_null_head(data))

  print('\nNull and empty values:')
  null_and_empty_rows(data)

### Decode Data Structures From a String


In [15]:
from json import JSONDecodeError
import json

def safe_decoding(row: str) -> any:
  try:
    if len(row) and row:
      return json.loads(row.replace('\'', '\"'))
    else:
      return row
  except JSONDecodeError:
    return row

def decode_json(data: pd.Series) -> pd.Series:
  return data.apply(lambda row: safe_decoding(row))

# Column Analysis


## 1. Question


In [16]:
initial_check(eda_df['question'])

Type: <class 'str'>

First 5 non-null rows:
0    For every string given as input, you need to t...
1    A scientist discovered a strange variation of ...
2    Two integers A and B are the inputs. Write a p...
3    Limak is a grizzly bear who desires power and ...
4    problem\n\nJOI took six subjects: physics, che...
dtype: object

Null and empty values:
Null count: 0 rows.
Row containing empty data: 0 rows.
Empty portion 0.0%


Questions are in string, so no need to decode the data


The question column is essential as it holds the actual problem set questions. It seems that the there is no null row too, which means that all rows can be used for prompting.


## 2. Solutions


In [17]:
initial_check(eda_df['solutions'])

Type: <class 'str'>

First 5 non-null rows:
0                                                   []
1                                                   []
2    ["def GCD(x, y):\n\twhile y:\n\t\t(x, y) = (y,...
3    ["n = int(input())\nv = list(map(int, input()....
4    ["lst = []\nfor i in range(6):\n\tn = int(inpu...
dtype: object

Null and empty values:
Null count: 0 rows.
Row containing empty data: 0 rows.
Empty portion 0.0%


The column type is string, but the content is a Python list, which means it needs to be decoded.


Some solutions are empty and the solution column itself cannot be used as a benchmark to the chatbot response as it is difficult to quantify how close the chabot codes are to these solutions and how good the codes based on the solutions data.


## 3. Starter Code


In [18]:
initial_check(eda_df['starter_code'])

Type: <class 'str'>

First 5 non-null rows:
0    #User function Template for python3\n\n\n\n'''...
1    class Solution:\n    def minSumOfLengths(self,...
2    from typing import List\n\n\n\n\n\n\n\nclass S...
3    #User function Template for python3\n\nclass S...
4    #User function Template for python3\n\n# Retur...
dtype: object

Null and empty values:
Null count: 0 rows.
Row containing empty data: 18303 rows.
Empty portion 77.5%


Starter code does not contain any valuable information, so it can be dropped.


## 4. Input-Output


In [19]:
initial_check(eda_df['input_output'])

Type: <class 'str'>

First 5 non-null rows:
0    {"inputs": ["1\naab", "3\naab\ndddd\nthisisapa...
1    {"inputs": ["3\n0\n7\n15\n655\n2711\n6395\n719...
2    {"inputs": [["3", "120 140", "10213 312", "10 ...
3    {"inputs": ["5\n5 1 11 2 8\n", "4\n1 8 8 8\n",...
4    {"inputs": ["100\n34\n76\n67\n10\n0", "100\n34...
dtype: object

Null and empty values:
Null count: 0 rows.
Row containing empty data: 0 rows.
Empty portion 0.0%


The data type is string, but the content is dictionary, so it needs to be decoded.


In [20]:
# To get better representation on how the data is structured, decoding must be done.
eda_df['input_output'] = decode_json(eda_df['input_output'])

In [21]:
initial_check(eda_df['input_output'])

Type: <class 'dict'>

First 5 non-null rows:
0    {'inputs': ['1
aab', '3
aab
dddd
thisisapalind...
1    {'inputs': ['3
0
7
15
655
2711
6395
7195
8465
...
2    {'inputs': [['3', '120 140', '10213 312', '10 ...
3    {'inputs': ['5
5 1 11 2 8
', '4
1 8 8 8
', '2
...
4    {'inputs': ['100
34
76
67
10
0', '100
34
76
96...
dtype: object

Null and empty values:
Null count: 0 rows.
Row containing empty data: 0 rows.
Empty portion 0.0%


Looking at the data, each input output data contains a dictionary with:<br />

1. Two keys: inputs and outputs
2. The values are a list of inputs and outputs with each index of input attributes to the corresponding index of the output


## 5. Difficulty


In [22]:
initial_check(eda_df['difficulty'])

Type: <class 'str'>

First 5 non-null rows:
0    UNKNOWN_DIFFICULTY
1    UNKNOWN_DIFFICULTY
2                  EASY
3                  EASY
4    UNKNOWN_DIFFICULTY
dtype: object

Null and empty values:
Null count: 0 rows.
Row containing empty data: 0 rows.
Empty portion 0.0%


In [23]:
eda_df['difficulty'].value_counts()

difficulty
EASY                  8099
UNKNOWN_DIFFICULTY    4467
MEDIUM                3082
HARD                  2995
MEDIUM_HARD           2656
VERY_HARD             2317
Name: count, dtype: int64

UNKNOWN_DIFFICULTY will not be used as it will be more difficult to filter out questions and analyze the questions and answers later on.


UNKNOWN_DIFFICULTY should be excluded as it will be difficult to discriminate between problem sets later on.


## 6. Raw Tags


In [24]:
initial_check(eda_df['raw_tags'])

Type: <class 'str'>

First 5 non-null rows:
0                                                   []
1                                                   []
2    ['LCM', 'Mathematics', 'Algorithms', 'Number T...
3                         ['greedy', 'implementation']
4                                                   []
dtype: object

Null and empty values:
Null count: 0 rows.
Row containing empty data: 0 rows.
Empty portion 0.0%


Some tags are empty, but keeping empty tags are not going to impact the problem set. This is why initial check says there are no empty rows even though some tags are empty.


The data needs to be decoded as they are list inside of a string.


In [25]:
eda_df['raw_tags'] = decode_json(eda_df['raw_tags'])

In [26]:
initial_check(eda_df['raw_tags'])

Type: <class 'list'>

First 5 non-null rows:
0    [LCM, Mathematics, Algorithms, Number Theory, ...
1                             [greedy, implementation]
2                               [math, implementation]
3    [combinatorics, matrices, math, constructive a...
4                             [greedy, implementation]
dtype: object

Null and empty values:
Null count: 0 rows.
Row containing empty data: 6874 rows.
Empty portion 29.11%


6874 of the total rows do not empty raw tags list.


## 7. Name


In [27]:
initial_check(eda_df['name'])

Type: <class 'str'>

First 5 non-null rows:
0                             subpalindrome-4
1                             modified-number
2                                   leading-1
3          miss-dd-and-her-mysterious-numbers
4    AtCoder Beginner Contest 083 - Some Sums
dtype: object

Null and empty values:
Null count: 20264 rows.
Row containing empty data: 20264 rows.
Empty portion 85.81%


Most of name column values are null, around 85.8062%, so this can be dropped as most data will have null name while the others have name.


## 8. Source


In [28]:
initial_check(eda_df['source'])

Type: <class 'str'>

First 5 non-null rows:
0    hackerearth
1           aizu
2       codechef
3     codeforces
4           aizu
dtype: object

Null and empty values:
Null count: 0 rows.
Row containing empty data: 0 rows.
Empty portion 0.0%


In [29]:
sources_set = set()

for source in eda_df['source']:
  sources_set.add(source)

print(sources_set)

{'geeksforgeeks', 'leetcode', 'codeforces', 'atcoder', 'codechef', 'kattis', 'hackerrank', 'codewars', 'hackerearth', 'aizu'}


These are the source of the problem sets. This output can be used to analyze the terms of services of each problem sets, and what can be used for this research.


TOS agreement:

- Code Forces https://codeforces.com/terms 👌
- LeetCode https://leetcode.com/terms/ ❌ (likely no)
- geeksforgeeks ❌ (likely no)
- kattis https://open.kattis.com/info/tos 👌Likely okay (non-commercial)
- aizu https://judge.u-aizu.ac.jp/onlinejudge/submission_note.jsp ❌ Risky (no clear terms)
- hackerearth https://www.hackerearth.com/terms-of-service/ ❌ High risk
- atcoder https://atcoder.jp/tos 👌Yes (Citation Required)
- Codewars https://www.codewars.com/about/terms-of-service 👌Yes (Citation Required)
- hackerrank https://www.hackerrank.com/terms-of-service/ 👌Yes (Citation Required, no problemset reproduction)
- codechef https://www.codechef.com/terms 👌Likely okay (non-commercial


This column is not used specifically in the prompting, so it can be dropped later.


## 9. Tags


In [30]:
initial_check(eda_df['tags'])

Type: <class 'str'>

First 5 non-null rows:
0                                                   []
1                                                   []
2    ['Mathematics', 'Number theory', 'Implementati...
3              ['Implementation', 'Greedy algorithms']
4                                                   []
dtype: object

Null and empty values:
Null count: 0 rows.
Row containing empty data: 0 rows.
Empty portion 0.0%


Tags are supposed to be a list, so it needs to be decoded.


In [31]:
eda_df['tags'] = decode_json(eda_df['tags'])

In [32]:
initial_check(eda_df['tags'])

Type: <class 'list'>

First 5 non-null rows:
0         [Mathematics, Number theory, Implementation]
1                  [Implementation, Greedy algorithms]
2                        [Mathematics, Implementation]
3    [Matrices, Combinatorics, Mathematics, Constru...
4                  [Implementation, Greedy algorithms]
dtype: object

Null and empty values:
Null count: 0 rows.
Row containing empty data: 7652 rows.
Empty portion 32.4%


7652 tags are empty, but it will not be dropped as problem sets with no tags are still included.


## 10. Skill Types


In [33]:
initial_check(eda_df['skill_types'])

Type: <class 'str'>

First 5 non-null rows:
0                       []
1                       []
2                       []
3    ['Greedy algorithms']
4                       []
dtype: object

Null and empty values:
Null count: 0 rows.
Row containing empty data: 0 rows.
Empty portion 0.0%


Skill Types is structured as a list, but the type is string, so it needs to be decoded.


In [34]:
eda_df['skill_types'] = decode_json(eda_df['skill_types'])

In [35]:
initial_check(eda_df['skill_types'])

Type: <class 'list'>

First 5 non-null rows:
0                                  [Greedy algorithms]
1                                  [Greedy algorithms]
2                     [Data structures, Range queries]
3                                    [Complete search]
4    [Dynamic programming, Amortized analysis, Sort...
dtype: object

Null and empty values:
Null count: 0 rows.
Row containing empty data: 13596 rows.
Empty portion 57.57%


13596 list of the entire skill types lists are empty, these empty skill type lists will be explained on the prompt construction.


## 11. URL


In [36]:
initial_check(eda_df['url'])

Type: <class 'str'>

First 5 non-null rows:
0           https://www.codechef.com/problems/FLOW016
1     https://codeforces.com/problemset/problem/574/A
2      https://codeforces.com/problemset/problem/40/C
3    https://codeforces.com/problemset/problem/1332/E
4     https://codeforces.com/problemset/problem/374/A
dtype: object

Null and empty values:
Null count: 4701 rows.
Row containing empty data: 4701 rows.
Empty portion 19.91%


URL information itself will not affect the produced solutions because they do not correlate with the problem sets, so it will be dropped for the prompting.


## 12. Expected Auxiliary Space


In [37]:
initial_check(eda_df['Expected Auxiliary Space'])

Type: <class 'str'>

First 5 non-null rows:
0                            O(1).
1                         O(N^{2})
2                             O(1)
3    O(Height of the Binary Tree).
4                 O(|str1|*|str2|)
dtype: object

Null and empty values:
Null count: 21230 rows.
Row containing empty data: 21352 rows.
Empty portion 90.41%


In [38]:
eda_df['Expected Auxiliary Space'].value_counts()

Expected Auxiliary Space
O(1)                                                      961
O(N)                                                      307
O(1).                                                     214
                                                          122
O(n)                                                      103
                                                         ... 
O(L * X), L = length of the path, X = number of paths.      1
O(n1 + n2 + n3)                                             1
 O(n2)                                                      1
O(min(n,m)).                                                1
O(log(N))                                                   1
Name: count, Length: 305, dtype: int64

Around 90% of problem sets do not have expected auxiliary space, so they will be assumed to not have expectations, which will be mentioned in the prompts later.


## 13. Time Limit


In [39]:
initial_check(eda_df['time_limit'])

Type: <class 'str'>

First 5 non-null rows:
0    8.0 seconds
1      1 seconds
2    8.0 seconds
3    2.0 seconds
4      2 seconds
dtype: object

Null and empty values:
Null count: 13097 rows.
Row containing empty data: 13097 rows.
Empty portion 55.46%


Around 55.46% of problem sets do not have time limit, so they will be assumed to not have expectations, which will be mentioned in the prompts later.


## 14. Date


In [40]:
initial_check(eda_df['date'])

Type: <class 'str'>

First 5 non-null rows:
0    2015-04-27
1    2019-12-31
2    2020-03-31
3    2019-12-31
4    2019-12-31
dtype: object

Null and empty values:
Null count: 17412 rows.
Row containing empty data: 17412 rows.
Empty portion 73.73%


Date information itself will not affect the produced solutions because they do not correlate with the problem sets, so it will be dropped altogether.


## 15. Picture Num


In [41]:
initial_check(eda_df['picture_num'])

Type: <class 'str'>

First 5 non-null rows:
0    0
1    1
2    0
3    0
4    0
dtype: object

Null and empty values:
Null count: 16906 rows.
Row containing empty data: 16906 rows.
Empty portion 71.59%


In [42]:
eda_df[['url', 'picture_num']].loc[eda_df['url'] == None]

Unnamed: 0,url,picture_num


Picture Num is dependent on url. If URL exists, picture_num can then exist. This indicate that picture num points to the picture inside of the URL if picture_num exists. This can be useful for feature engineering.


## 16. Memory Limit


In [43]:
initial_check(eda_df['memory_limit'])

Type: <class 'str'>

First 5 non-null rows:
0    134.217728 megabytes
1             50000 bytes
2    268.435456 megabytes
3         256.0 megabytes
4           512 megabytes
dtype: object

Null and empty values:
Null count: 13096 rows.
Row containing empty data: 13096 rows.
Empty portion 55.45%


55.45% of memory limit is empty. These empty values will be assumed as no time limit is given for the problem sets.


## 17. Expected Time Complexity


In [44]:
initial_check(eda_df['Expected Time Complexity'])

Type: <class 'str'>

First 5 non-null rows:
0               O(N).
1            O(N^{2})
2              O(|S|)
3               O(N).
4    O(|str1|*|str2|)
dtype: object

Null and empty values:
Null count: 21013 rows.
Row containing empty data: 21135 rows.
Empty portion 89.49%


55.45% of expected time complexity is empty. These empty values will be assumed as no expected time complexity is given for the problem sets.


# Feature Engineering


In [45]:
cleaned_df = full_df.copy()

### Drop Columns


In [46]:
cleaned_df.drop(columns=['solutions', 'starter_code', 'name', 'source', 'date'], inplace=True, axis=1)

### Filter Out Rows


In [47]:
cleaned_df = cleaned_df.loc[cleaned_df['difficulty'] != 'UNKNOWN_DIFFICULTY']

### Decoding


In [48]:
decoded_cols = ['input_output', 'raw_tags', 'tags', 'skill_types']

for col in decoded_cols:
  cleaned_df[col] = decode_json(cleaned_df[col])

### Results


In [55]:
cleaned_df.reset_index(inplace=True, drop=True)

In [56]:
cleaned_df.head()

Unnamed: 0,question,input_output,difficulty,raw_tags,tags,skill_types,url,Expected Auxiliary Space,time_limit,picture_num,memory_limit,Expected Time Complexity
0,Two integers A and B are the inputs. Write a p...,"{'inputs': [['3', '120 140', '10213 312', '10 ...",EASY,"[LCM, Mathematics, Algorithms, Number Theory, ...","[Mathematics, Number theory, Implementation]",[],https://www.codechef.com/problems/FLOW016,,1 seconds,0.0,50000 bytes,
1,Limak is a grizzly bear who desires power and ...,"{'inputs': ['5 5 1 11 2 8 ', '4 1 8 8 8 ', '2 ...",EASY,"[greedy, implementation]","[Implementation, Greedy algorithms]",[Greedy algorithms],https://codeforces.com/problemset/problem/574/A,,,,,
2,Last year the world's largest square was built...,"{'inputs': ['99085 7738 98097 -6487 ', '1 2 2 ...",HARD,"[math, implementation]","[Mathematics, Implementation]",[],https://codeforces.com/problemset/problem/40/C,,2.0 seconds,,256.0 megabytes,
3,Alice has got addicted to a game called Sirtet...,"{'inputs': ['2 2 1 1 ', '1 2 1 2 ', '485 117 3...",HARD,"[combinatorics, matrices, math, constructive a...","[Matrices, Combinatorics, Mathematics, Constru...",[],https://codeforces.com/problemset/problem/1332/E,,2 seconds,1.0,512 megabytes,
4,Dima and Inna are doing so great! At the momen...,"{'inputs': ['5 7 1 3 2 2 ', '5 5 2 3 1 1 ', '1...",HARD,"[greedy, implementation]","[Implementation, Greedy algorithms]",[Greedy algorithms],https://codeforces.com/problemset/problem/374/A,,,,,


In [50]:
cleaned_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 19149 entries, 2 to 999
Data columns (total 12 columns):
 #   Column                    Non-Null Count  Dtype 
---  ------                    --------------  ----- 
 0   question                  19149 non-null  object
 1   input_output              19149 non-null  object
 2   difficulty                19149 non-null  object
 3   raw_tags                  19149 non-null  object
 4   tags                      19149 non-null  object
 5   skill_types               19149 non-null  object
 6   url                       17891 non-null  object
 7   Expected Auxiliary Space  2386 non-null   object
 8   time_limit                7915 non-null   object
 9   picture_num               6710 non-null   object
 10  memory_limit              7915 non-null   object
 11  Expected Time Complexity  2603 non-null   object
dtypes: object(12)
memory usage: 1.9+ MB


In [54]:
for col in cleaned_df:
  print(f'[{col}]\'s type: {cleaned_df[col].dtype}')

[question]'s type: object
[input_output]'s type: object
[difficulty]'s type: object
[raw_tags]'s type: object
[tags]'s type: object
[skill_types]'s type: object
[url]'s type: object
[Expected Auxiliary Space]'s type: object
[time_limit]'s type: object
[picture_num]'s type: object
[memory_limit]'s type: object
[Expected Time Complexity]'s type: object


# Problems Selection


# Prompts Construction
