# 1. Retrieving data from the Aalto Open Courses API

In [1]:
import os
from dotenv import load_dotenv
import json

load_dotenv()

api_key = os.getenv('COURSE_API_KEY')


In [2]:
import requests

# if data file exists, read it
if os.path.isfile('data/data.json'):
    with open('data/data.json', 'r') as f:
        raw_data = json.load(f)
else:
    # if not, get data from API
    url = 'https://course.api.aalto.fi:443/api/sisu/v1/courseunitrealisations?user_key=' + api_key
    r = requests.get(url)

    # get data
    raw_data = r.json()

    # remove the courses that have languageOfInstruction = 'en'
    print("There are", len(raw_data), "courses in total.")

    en_data = [course for course in raw_data if 'en' in course['languageOfInstructionCodes']]
    not_en_data = [course for course in raw_data if 'en' not in course['languageOfInstructionCodes']]

    print("There are", len(en_data), "courses in English.")
    print("There are", len(not_en_data), "courses not in English.")

    # save data into a json file
    with open('data/data.json', 'w') as f:
        json.dump(en_data, f)

    raw_data = en_data

In [3]:
import pandas as pd
import re

# add data into a dataframe without the summary column
courses = pd.DataFrame(raw_data)
del courses['summary']
del courses['languageOfInstructionCodes']

# extract content and learning outcomes and add them as new columns
courses['content'] = [course['summary']['content']['en']
                      for course in raw_data]
courses['learningOutcomes'] = [course['summary']
                               ['learningOutcomes']['en'] for course in raw_data]

# fix other fields
courses['name'] = [course['name']['en'] for course in raw_data]
courses['credits'] = [course['credits']['max'] for course in raw_data]
courses.rename(columns={'credits': 'maxcredits'}, inplace=True)
courses['organizationName'] = [course['organizationName']['en']
                               for course in raw_data]
courses['organizations'] = [course['organizations'][0]['organisationId']
                            for course in raw_data]
del courses['studySubGroups']

# extract some other useful information
courses['teachingPeriod'] = [course['summary']['teachingPeriod']['en']
                             for course in raw_data]
courses['linkToCourse'] = [course['summary']['additionalInformation']['en']
                           for course in raw_data]
# TODO extract the http link from the linkToCourse column
courses['linkToCourse'] = re.search(
    "(?P<url>https?://[^\s]+)", str(courses['linkToCourse'])).group("url")

# reorganize the columns
courses = courses[['id', 'code', 'courseUnitId', 'type', 'name', 'content', 'learningOutcomes', 'teachers', 'teachingPeriod', 'startDate', 'endDate',
                   'mincredits', 'maxcredits', 'enrolmentStartDate', 'enrolmentEndDate', 'organizationId', 'organizationName', 'organizations', 'linkToCourse']]

# fix type of all columns
courses['id'] = courses['id'].astype(str)
courses['code'] = courses['code'].astype(str)
courses['type'] = courses['type'].astype(str)
courses['name'] = courses['name'].astype(str)
courses['content'] = courses['content'].astype(str)
courses['learningOutcomes'] = courses['learningOutcomes'].astype(str)
courses['teachers'] = courses['teachers'].astype(str)
courses['teachingPeriod'] = courses['teachingPeriod'].astype(str)
courses['startDate'] = pd.to_datetime(courses['startDate'])
courses['endDate'] = pd.to_datetime(courses['endDate'])
courses['mincredits'] = courses['mincredits'].astype(int)
courses['maxcredits'] = courses['maxcredits'].astype(int)
courses['enrolmentStartDate'] = pd.to_datetime(courses['enrolmentStartDate'])
courses['enrolmentEndDate'] = pd.to_datetime(courses['enrolmentEndDate'])
courses['organizationId'] = courses['organizationId'].astype(str)
courses['organizationName'] = courses['organizationName'].astype(str)
courses['organizations'] = courses['organizations'].astype(str)
courses['linkToCourse'] = courses['linkToCourse'].astype(str)

courses.head()


Unnamed: 0,id,code,courseUnitId,type,name,content,learningOutcomes,teachers,teachingPeriod,startDate,endDate,mincredits,maxcredits,enrolmentStartDate,enrolmentEndDate,organizationId,organizationName,organizations,linkToCourse
0,aalto-CUR-162063-3082770,LC-1117,aalto-OPINKOHD-1117673055-20210801,teaching-participation-lectures,"Integrated Oral and Written Skills (o,w) (H06 ...",This course introduces written and oral commun...,"Upon completion of this course, the students w...",['Hanna Liisa Hakala'],"2020-2021 Autumn I-II,Spring III-IV,Spring IV-...",2023-01-11,2023-02-15,3,3,2022-12-12,2023-01-02,U926,"Aalto University, Language Centre",aalto-52ed67c2-4791-4ee6-9475-547b73c8d10a,https://mycourses.aalto.fi/co...
1,aalto-CUR-162064-3082771,LC-1117,aalto-OPINKOHD-1117673055-20210801,teaching-participation-lectures,"Integrated Oral and Written Skills (o,w) (H07 ...",This course introduces written and oral commun...,"Upon completion of this course, the students w...",['Jenni Maria Korvala'],"2020-2021 Autumn I-II,Spring III-IV,Spring IV-...",2023-01-11,2023-03-29,3,3,2022-12-12,2023-01-10,U926,"Aalto University, Language Centre",aalto-52ed67c2-4791-4ee6-9475-547b73c8d10a,https://mycourses.aalto.fi/co...
2,aalto-CUR-162065-3082772,LC-1117,aalto-OPINKOHD-1117673055-20210801,teaching-participation-lectures,"Integrated Oral and Written Skills (o,w), Lect...",This course introduces written and oral commun...,"Upon completion of this course, the students w...",['Maxi-Ann Marie A Campbell'],"2020-2021 Autumn I-II,Spring III-IV,Spring IV-...",2023-01-17,2023-04-04,3,3,2022-12-12,2023-01-12,U926,"Aalto University, Language Centre",aalto-52ed67c2-4791-4ee6-9475-547b73c8d10a,https://mycourses.aalto.fi/co...
3,aalto-CUR-162066-3082773,LC-1117,aalto-OPINKOHD-1117673055-20210801,teaching-participation-lectures,"Integrated Oral and Written Skills (o,w) (BSc ...",This course introduces written and oral commun...,"Upon completion of this course, the students w...",['Riina Marketta Seppälä'],"2020-2021 Autumn I-II,Spring III-IV,Spring IV-...",2023-03-03,2023-05-19,3,3,2023-01-30,2023-02-20,U926,"Aalto University, Language Centre",aalto-52ed67c2-4791-4ee6-9475-547b73c8d10a,https://mycourses.aalto.fi/co...
4,aalto-CUR-162068-3082775,LC-1117,aalto-OPINKOHD-1117673055-20210801,teaching-participation-lectures,"Integrated Oral and Written Skills (o,w) (Int...",This course introduces written and oral commun...,"Upon completion of this course, the students w...","['Susan Katariina Gamache', 'Malachy James Hal...","2020-2021 Autumn I-II,Spring III-IV,Spring IV-...",2023-04-25,2023-05-23,3,3,2023-03-27,2023-04-17,U926,"Aalto University, Language Centre",aalto-52ed67c2-4791-4ee6-9475-547b73c8d10a,https://mycourses.aalto.fi/co...


In [4]:
# number of courses with unique code
# print(len(courses["code"].unique())) # 985

# number of courses with unique courseUnitId
# print(len(courses["courseUnitId"].unique())) # 993

# measure difference between two fields
# unique_courseUnitId = courses["courseUnitId"].unique()
# unique_code = courses["code"].unique()

unique_courseUnitId = courses.drop_duplicates(subset=["courseUnitId"], keep="first")
unique_code = courses.drop_duplicates(subset=["code"], keep="first")

# difference of ids
id_difference = list(set(unique_courseUnitId["id"]) - set(unique_code["id"]))

# different courses with different filtering (code, unitId)
courses[courses["id"].isin(id_difference)]

Unnamed: 0,id,code,courseUnitId,type,name,content,learningOutcomes,teachers,teachingPeriod,startDate,endDate,mincredits,maxcredits,enrolmentStartDate,enrolmentEndDate,organizationId,organizationName,organizations,linkToCourse
518,aalto-CUR-164698-3085405,MS-A0001,aalto-OPINKOHD-1112895518-20210801,exam-exam,"Matrix Algebra, Exam","Vector computations, matrices and systems of l...",After the course the student - can write syste...,['Harri Heimo Petteri Hakula'],"2020-2021 Autumn II, 2021-2022 Autumn II",2023-02-24,2023-02-24,5,5,2022-12-26,2023-02-17,T302,Department of Mathematics and Systems Analysis,aalto-a3b50b1e-ed7d-4136-8897-c7aa1a4f899b,https://mycourses.aalto.fi/co...
606,aalto-CUR-165469-3086176,MS-C1541,aalto-OPINKOHD-1142267517-20210801,teaching-participation-lectures,"Metric Spaces, Lecture","real numbers, metric, norm, inner product, ope...",After passing the course the student knows - m...,['Kalle Perttu Juhana Kytölä'],"2020-2021 Spring III, 2021-2022 Spring III",2023-01-09,2023-02-23,5,5,2022-12-12,2023-01-02,T302,Department of Mathematics and Systems Analysis,aalto-a3b50b1e-ed7d-4136-8897-c7aa1a4f899b,https://mycourses.aalto.fi/co...
622,aalto-CUR-165738-3086445,PHYS-E0525,otm-ebad0f6d-8023-47e2-bfe1-724ca4b10653,exam-exam,"Microscopy of Nanomaterials D, Exam",The course gives basic knowledge of the micros...,"After the course, students will understand the...","['Janne Tapio Ruokolainen', 'Hua Jiang']",<p> Teaching Language : English</p><p> Teachin...,2023-04-18,2023-04-18,5,5,2023-02-17,2023-04-11,T304,Department of Applied Physics,aalto-7b8d166f-e80e-4481-838e-72098c5ea180,https://mycourses.aalto.fi/co...
629,aalto-CUR-165886-3086593,PHYS-C0256,otm-4cb03484-428b-4232-8db0-16dfff5ed692,exam-exam,"Thermodynamics and Statistical Physics, Exam",The basic concepts and assumptions of statisti...,After the course the student\r\n<ol><li>Can ex...,['Jukka Pekka Pekola'],<p> Teaching Language : English</p><p> Teachin...,2023-02-22,2023-02-22,5,5,2022-12-24,2023-02-15,T304,Department of Applied Physics,aalto-7b8d166f-e80e-4481-838e-72098c5ea180,https://mycourses.aalto.fi/co...
633,aalto-CUR-165900-3086607,PHYS-E055103,otm-38943da8-7b1d-408a-855f-201f0fea96e8,exam-exam,"Low Temperature Physics D, Superconductivity, ...",The Bardeen-Cooper-Schrieffer theory of superc...,The students will get a basic understanding of...,['Vladimir Eltsov'],"<p>In 2022-2024, this course is available as a...",2023-04-18,2023-04-18,5,6,2023-02-17,2023-04-11,T304,Department of Applied Physics,aalto-7b8d166f-e80e-4481-838e-72098c5ea180,https://mycourses.aalto.fi/co...
736,aalto-CUR-166588-3087295,NBE-E4100,otm-9881eb6c-844e-4431-a1cf-8327dca88257,exam-exam,"Molecular Biophysics D, Exam",<ul><li>Fundamentals of biomolecules: Biopolym...,After passing the course the student is able t...,['Anton Kuzyk'],<p> Teaching Language : English</p><p> Teachin...,2023-05-31,2023-05-31,5,5,2023-04-01,2023-05-24,T314,Department of Neuroscience and Biomedical Engi...,aalto-1311f55a-509b-485d-b974-5ddaf28ffaa8,https://mycourses.aalto.fi/co...
1272,aalto-CUR-170151-2411704,PHYS-C0252,otm-7cef259b-e7cd-4495-a080-01c0d58cf8cc,exam-exam,"Quantum Mechanics, Exam","Hilbert space and Dirac notation; Operators, e...",After completing the course the student\r\n<ol...,"['Mikko Pentti Matias Möttönen', 'Tapio Ala-Ni...",<p> Teaching Language : English</p><p> Teachin...,2023-06-08,2023-06-08,5,5,2023-04-09,2023-06-01,T304,Department of Applied Physics,aalto-7b8d166f-e80e-4481-838e-72098c5ea180,https://mycourses.aalto.fi/co...
1275,aalto-CUR-170160-2412218,PHYS-C0254,otm-54fd870b-cc18-4a73-9bee-eeb5acf2b0d0,exam-exam,"Quantum Circuits, Exam",The physical foundations and implementation of...,"After completing this course, the student is a...","['Gheorghe-Sorin Paraoanu', 'Jan Goetz']",<p> Teaching Language : English</p><p> Teachin...,2023-04-20,2023-04-20,5,5,2023-02-19,2023-04-13,T304,Department of Applied Physics,aalto-7b8d166f-e80e-4481-838e-72098c5ea180,https://mycourses.aalto.fi/co...


In [5]:
MS_C1541_exams = courses[(courses["code"] == "MS-C1541") & (courses["type"] == "exam-exam")]["content"]
MS_C1541_teachings = courses[(courses["code"] == "MS-C1541") & (courses["type"] == "teaching-participation-lectures")]["content"]

# exam course contents
print(MS_C1541_exams[603])
# teaching course contents
print(MS_C1541_teachings[606])

assert MS_C1541_exams[603] == MS_C1541_teachings[606]

real numbers, metric, norm, inner product, open and closed sets, continuous mappings, sequences and limits, compactness, completeness, connectedness.
real numbers, metric, norm, inner product, open and closed sets, continuous mappings, sequences and limits, compactness, completeness, connectedness.


In [6]:
# average length of course contents
print("Average length of course contents:", courses["content"].apply(len).mean())

# average length of learning outcomes
print("Average length of learning outcomes:", courses["learningOutcomes"].apply(len).mean())

Average length of course contents: 461.59348441926346
Average length of learning outcomes: 472.3201133144476


After this findings, we decided to filter the data using the code field. We will also use the learning outcomes field combined with the content to create a new field that will be used for the recommender system.

In [7]:
courses = courses.drop_duplicates(subset=["code"], keep="first")
# re-index the dataframe
courses = courses.reset_index(drop=True)

courses

Unnamed: 0,id,code,courseUnitId,type,name,content,learningOutcomes,teachers,teachingPeriod,startDate,endDate,mincredits,maxcredits,enrolmentStartDate,enrolmentEndDate,organizationId,organizationName,organizations,linkToCourse
0,aalto-CUR-162063-3082770,LC-1117,aalto-OPINKOHD-1117673055-20210801,teaching-participation-lectures,"Integrated Oral and Written Skills (o,w) (H06 ...",This course introduces written and oral commun...,"Upon completion of this course, the students w...",['Hanna Liisa Hakala'],"2020-2021 Autumn I-II,Spring III-IV,Spring IV-...",2023-01-11,2023-02-15,3,3,2022-12-12,2023-01-02,U926,"Aalto University, Language Centre",aalto-52ed67c2-4791-4ee6-9475-547b73c8d10a,https://mycourses.aalto.fi/co...
1,aalto-CUR-162078-3082785,LC-1310,aalto-OPINKOHD-1117673220-20210801,teaching-participation-lectures,"Academic Communication for MSc Students (o,w),...",The course is intended for students in master&...,"Upon completion of this course, students will ...",['Jan-Mikael Rybicki'],"2020-2021 Autumn I-II,Spring III-IV , 2021-202...",2023-01-13,2023-04-21,3,3,2022-12-12,2023-01-12,U926,"Aalto University, Language Centre",aalto-52ed67c2-4791-4ee6-9475-547b73c8d10a,https://mycourses.aalto.fi/co...
2,aalto-CUR-162115-3082822,LCA-1015,aalto-OPINKOHD-1126097189-20210801,teaching-participation-small-group,"Autonomous Language Learning Path (o,w), Small...",The students will devise a personalized learni...,The main goal of this course is that students ...,['Hanna Liisa Hakala'],"2020-2021 Autumn I-II,Spring III-IV , 2021-202...",2023-01-09,2023-04-21,3,3,2022-12-12,2023-01-02,U926,"Aalto University, Language Centre",aalto-52ed67c2-4791-4ee6-9475-547b73c8d10a,https://mycourses.aalto.fi/co...
3,aalto-CUR-162118-3082825,LCA-1022,aalto-OPINKOHD-1126098837-20210801,teaching-participation-lectures,"Academic Writing (w), Lecture",Successful participation requires that you hav...,"Upon completion of this course, students will ...",['Matthew Peter Billington'],"2020-2021 Autumn I,Autumn II,Spring III,Spring...",2023-01-11,2023-02-13,3,3,2022-12-12,2023-01-02,U926,"Aalto University, Language Centre",aalto-52ed67c2-4791-4ee6-9475-547b73c8d10a,https://mycourses.aalto.fi/co...
4,aalto-CUR-162124-3082831,LC-1113,aalto-OPINKOHD-1126098945-20210801,teaching-participation-small-group,"Autonomous Language Learning Path (o,w), Sma...",The students will devise a personalised learni...,The main goal of this course is that students ...,['Hanna Liisa Hakala'],"2020-2021 Autumn I-II,Spring III-V , 2021-2022...",2023-01-09,2023-04-21,3,3,2022-12-12,2023-01-02,U926,"Aalto University, Language Centre",aalto-52ed67c2-4791-4ee6-9475-547b73c8d10a,https://mycourses.aalto.fi/co...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
980,otm-e8183aa3-e6d7-4d44-952b-fe4c229426d8,CS-E4675,aalto-CU-1150933422-20220801,teaching-participation-project,"Full Stack Web Development D, Project",The contents of the course are openly availabl...,The course is offered by the University of Hel...,['Arto Hellas'],<p> Teaching Language : English</p><p> Teachin...,2023-01-09,2023-06-09,5,7,2022-12-12,2023-05-29,T313,Department of Computer Science,aalto-3b3aa303-843a-4a29-97ca-29c45d53f923,https://mycourses.aalto.fi/co...
981,otm-edfae58b-3d21-4fcb-a82b-3c4a789c0751,CS-E400604,otm-acc4b322-b771-43cc-b734-3121c68c0b93,teaching-participation-project,Research Experience Project in Computer Scienc...,,,['Olli Pekka Orponen'],,2023-01-01,2023-07-31,5,5,2023-01-01,2023-07-17,T313,Department of Computer Science,aalto-3b3aa303-843a-4a29-97ca-29c45d53f923,https://mycourses.aalto.fi/co...
982,otm-ee70c3a2-38e6-472b-9d38-45386b12fb98,TU-CV00011,otm-86a3f11a-6e29-4691-ab8a-19c9fce9fad6,teaching-participation-lectures,"Thinking Tools, Lectures",,,['Lauri Veikko Järvilehto'],,2023-02-28,2023-04-28,3,3,2023-01-30,2023-03-06,T307,Department of Industrial Engineering and Manag...,aalto-9a94b0a1-836d-4384-9bde-6709e8a053db,https://mycourses.aalto.fi/co...
983,otm-f08696a1-8207-45b3-9958-524bf5146c22,ENG-LV,aalto-OPINKOHD-1125772556-20210801,teaching-participation-lectures,"Course with Varying Content, V D, Lectures",,,"['Harri Juhani Koivusalo', 'Maija Kaarina Taka']",,2023-04-26,2023-05-31,1,10,2023-02-01,2023-04-16,T2,School of Engineering,aalto-f58f28c8-3503-48e3-a223-41840f239806,https://mycourses.aalto.fi/co...


In [8]:
example_course = courses.iloc[0]["content"]
example_course

'This course introduces written and oral communication principles and strategies that are applicable to professional and academic purposes and is at the same time integrated with a content course. Written tasks and oral tasks will support the content course and be largely determined by its requirements. Throughout this course, students work individually and/or in small groups to develop their presentation and writing skills. Moreover, students give and receive constructive feedback on their work and revise it accordingly. Working Life Skills: Varies according to each integration project.'

In [9]:
from transformers import BertTokenizer

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)

  from .autonotebook import tqdm as notebook_tqdm


In [10]:
tokenized_text = tokenizer.tokenize(example_course)
encoded = tokenizer.encode(
                    example_course, # Sentence to encode.
                    add_special_tokens = True, # Add '[CLS]' and '[SEP]'
                    # This function also supports truncation and conversion
                    # to pytorch tensors, but we need to do padding, so we
                    # can't use these features :( .
                    #max_length = 128,          # Truncate all sentences.
                    #return_tensors = 'pt',     # Return pytorch tensors.
               )

In [None]:
tokenized_text = ["[CLS]"] + tokenized_text + ["[SEP]"] 
tokenized_text

In [None]:
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
indexed_tokens

In [13]:
segments_ids = [1] * len(tokenized_text)

In [14]:
import torch

tokens_tensor = torch.tensor([indexed_tokens])
segments_tensors = torch.tensor([segments_ids])

In [15]:
from transformers import BertModel

In [None]:
model = BertModel.from_pretrained('bert-base-uncased',
                                  output_hidden_states = True, # Whether the model returns all hidden-states.
                                  )

# Put the model in "evaluation" mode, meaning feed-forward operation.
model.eval()

In [17]:
with torch.no_grad():

    outputs = model(tokens_tensor, segments_tensors)

    # Evaluating the model will return a different number of objects based on 
    # how it's  configured in the `from_pretrained` call earlier. In this case, 
    # becase we set `output_hidden_states = True`, the third item will be the 
    # hidden states from all layers. See the documentation for more details:
    # https://huggingface.co/transformers/model_doc/bert.html#bertmodel
    hidden_states = outputs[2]

In [18]:
len(hidden_states)

13

In [19]:
len(indexed_tokens)

99

In [20]:
# Concatenate the tensors for all layers. We use `stack` here to
# create a new dimension in the tensor.
token_embeddings = torch.stack(hidden_states, dim=0)

token_embeddings.size()

torch.Size([13, 1, 99, 768])

In [21]:
# Remove dimension 1, the "batches".
token_embeddings = torch.squeeze(token_embeddings, dim=1)

token_embeddings.size()

torch.Size([13, 99, 768])

In [22]:
token_embeddings[12].size()

torch.Size([99, 768])

In [23]:
# `hidden_states` has shape [13 x 1 x 22 x 768]

# `token_vecs` is a tensor with shape [22 x 768]
token_vecs = hidden_states[-2][0]

# Calculate the average of all 22 token vectors.
sentence_embedding = torch.mean(token_vecs, dim=0)
sentence_embedding.size()

torch.Size([768])

# 2. Data cleaning

In [24]:
courses

Unnamed: 0,id,code,courseUnitId,type,name,content,learningOutcomes,teachers,teachingPeriod,startDate,endDate,mincredits,maxcredits,enrolmentStartDate,enrolmentEndDate,organizationId,organizationName,organizations,linkToCourse
0,aalto-CUR-162063-3082770,LC-1117,aalto-OPINKOHD-1117673055-20210801,teaching-participation-lectures,"Integrated Oral and Written Skills (o,w) (H06 ...",This course introduces written and oral commun...,"Upon completion of this course, the students w...",['Hanna Liisa Hakala'],"2020-2021 Autumn I-II,Spring III-IV,Spring IV-...",2023-01-11,2023-02-15,3,3,2022-12-12,2023-01-02,U926,"Aalto University, Language Centre",aalto-52ed67c2-4791-4ee6-9475-547b73c8d10a,https://mycourses.aalto.fi/co...
1,aalto-CUR-162078-3082785,LC-1310,aalto-OPINKOHD-1117673220-20210801,teaching-participation-lectures,"Academic Communication for MSc Students (o,w),...",The course is intended for students in master&...,"Upon completion of this course, students will ...",['Jan-Mikael Rybicki'],"2020-2021 Autumn I-II,Spring III-IV , 2021-202...",2023-01-13,2023-04-21,3,3,2022-12-12,2023-01-12,U926,"Aalto University, Language Centre",aalto-52ed67c2-4791-4ee6-9475-547b73c8d10a,https://mycourses.aalto.fi/co...
2,aalto-CUR-162115-3082822,LCA-1015,aalto-OPINKOHD-1126097189-20210801,teaching-participation-small-group,"Autonomous Language Learning Path (o,w), Small...",The students will devise a personalized learni...,The main goal of this course is that students ...,['Hanna Liisa Hakala'],"2020-2021 Autumn I-II,Spring III-IV , 2021-202...",2023-01-09,2023-04-21,3,3,2022-12-12,2023-01-02,U926,"Aalto University, Language Centre",aalto-52ed67c2-4791-4ee6-9475-547b73c8d10a,https://mycourses.aalto.fi/co...
3,aalto-CUR-162118-3082825,LCA-1022,aalto-OPINKOHD-1126098837-20210801,teaching-participation-lectures,"Academic Writing (w), Lecture",Successful participation requires that you hav...,"Upon completion of this course, students will ...",['Matthew Peter Billington'],"2020-2021 Autumn I,Autumn II,Spring III,Spring...",2023-01-11,2023-02-13,3,3,2022-12-12,2023-01-02,U926,"Aalto University, Language Centre",aalto-52ed67c2-4791-4ee6-9475-547b73c8d10a,https://mycourses.aalto.fi/co...
4,aalto-CUR-162124-3082831,LC-1113,aalto-OPINKOHD-1126098945-20210801,teaching-participation-small-group,"Autonomous Language Learning Path (o,w), Sma...",The students will devise a personalised learni...,The main goal of this course is that students ...,['Hanna Liisa Hakala'],"2020-2021 Autumn I-II,Spring III-V , 2021-2022...",2023-01-09,2023-04-21,3,3,2022-12-12,2023-01-02,U926,"Aalto University, Language Centre",aalto-52ed67c2-4791-4ee6-9475-547b73c8d10a,https://mycourses.aalto.fi/co...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
980,otm-e8183aa3-e6d7-4d44-952b-fe4c229426d8,CS-E4675,aalto-CU-1150933422-20220801,teaching-participation-project,"Full Stack Web Development D, Project",The contents of the course are openly availabl...,The course is offered by the University of Hel...,['Arto Hellas'],<p> Teaching Language : English</p><p> Teachin...,2023-01-09,2023-06-09,5,7,2022-12-12,2023-05-29,T313,Department of Computer Science,aalto-3b3aa303-843a-4a29-97ca-29c45d53f923,https://mycourses.aalto.fi/co...
981,otm-edfae58b-3d21-4fcb-a82b-3c4a789c0751,CS-E400604,otm-acc4b322-b771-43cc-b734-3121c68c0b93,teaching-participation-project,Research Experience Project in Computer Scienc...,,,['Olli Pekka Orponen'],,2023-01-01,2023-07-31,5,5,2023-01-01,2023-07-17,T313,Department of Computer Science,aalto-3b3aa303-843a-4a29-97ca-29c45d53f923,https://mycourses.aalto.fi/co...
982,otm-ee70c3a2-38e6-472b-9d38-45386b12fb98,TU-CV00011,otm-86a3f11a-6e29-4691-ab8a-19c9fce9fad6,teaching-participation-lectures,"Thinking Tools, Lectures",,,['Lauri Veikko Järvilehto'],,2023-02-28,2023-04-28,3,3,2023-01-30,2023-03-06,T307,Department of Industrial Engineering and Manag...,aalto-9a94b0a1-836d-4384-9bde-6709e8a053db,https://mycourses.aalto.fi/co...
983,otm-f08696a1-8207-45b3-9958-524bf5146c22,ENG-LV,aalto-OPINKOHD-1125772556-20210801,teaching-participation-lectures,"Course with Varying Content, V D, Lectures",,,"['Harri Juhani Koivusalo', 'Maija Kaarina Taka']",,2023-04-26,2023-05-31,1,10,2023-02-01,2023-04-16,T2,School of Engineering,aalto-f58f28c8-3503-48e3-a223-41840f239806,https://mycourses.aalto.fi/co...


In [25]:
# courses with empty content and learning outcomes
len(courses[(courses["content"] == "") & (courses["learningOutcomes"] == "")])

45

In [158]:
# concatenate content and learning outcomes
data = courses[(courses["content"] != "") & (courses["learningOutcomes"] != "")]
data = data[["code", "content", "learningOutcomes"]]
data["content_learningOutcomes"] = data["content"] + " " + data["learningOutcomes"]

del data["content"]
del data["learningOutcomes"]

data

Unnamed: 0,code,content_learningOutcomes
0,LC-1117,This course introduces written and oral commun...
1,LC-1310,The course is intended for students in master&...
2,LCA-1015,The students will devise a personalized learni...
3,LCA-1022,Successful participation requires that you hav...
4,LC-1113,The students will devise a personalised learni...
...,...,...
974,MNGT-C1003,Chapter 1: How is work changingWhat do we talk...
976,CS-EJ5481,This module aims at demystifying digital trans...
977,JOIN-A0002,To act as a tutor for a small group of interna...
979,CS-E4770,Methodologies and evolution of designing and b...


In [159]:
# generate a dict with code as key
data_dict = data.set_index("code").T.to_dict("list")

len(data_dict['LC-1117'][0].split())

138

In [35]:
import nltk
from nltk.corpus import stopwords

nltk.download('stopwords')

[nltk_data] Downloading package stopwords to
[nltk_data]     /home/tianxing/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [164]:
def preprocessing(input_sentence):

    # remove stopwords
    # input_sentence = [word for word in input_sentence.split() if word.lower() not in stopwords.words('english')]
    # input_sentence = " ".join(input_sentence)

    #print(input_sentence)

    #encoded_input = tokenizer(input_sentence, padding=True, truncation=True)
    #print(encoded_input)

    # TODO get index of stopwords and return it
    # split the input_sentence into sentences
    sentences = input_sentence.split(".")
    #print(len(sentences))

    seperate_sentences = "[SEP]".join(sentences)
    seperate_sentences = "[CLS]" + seperate_sentences


    encoded_sent = tokenizer(
                        seperate_sentences,                      # Sentence to encode.
                        # add_special_tokens = True, # Add '[CLS]' and '[SEP]'
                        # This function also supports truncation and conversion
                        # to pytorch tensors, but we need to do padding, so we
                        # can't use these features :( .
                        padding = True,
                        truncation = True,
                        max_length = 99,          # Truncate all sentences.
                        #return_tensors = 'pt',     # Return pytorch tensors.
                )

    return encoded_sent["input_ids"], encoded_sent["token_type_ids"]

In [122]:
def get_embedding(encoded_sent, segments_ids):

    tokens_tensor = torch.tensor([encoded_sent])
    segments_tensors = torch.tensor([segments_ids])

    with torch.no_grad():
        outputs = model(tokens_tensor, segments_tensors)
        hidden_states = outputs[2]

    # TODO remove embedding of stop words with index
    # TODO improvement
    return torch.mean(hidden_states[-2][0], dim=0)

In [165]:
course_words_embeddings = {}

for (code, desc) in list(data_dict.items()):
    
    # preprocess the course description
    encoded_sent, segments_ids = preprocessing(desc[0])

    # get the embedding of the course description from the model
    embedding = get_embedding(encoded_sent, segments_ids)

    course_words_embeddings[code] = embedding

In [80]:
cos = torch.nn.CosineSimilarity(dim=0, eps=1e-6)
# print(course_words_embeddings['LC-1117'])
# print(course_words_embeddings['LC-1310'])
cos(course_words_embeddings['LC-1117'], course_words_embeddings['LC-1310'])

tensor(1.0000)

In [40]:
# courses[courses["code"].isin(list(course_words_embeddings.keys()))]

In [81]:
encoded_sent, segments_ids = preprocessing(data_dict["ELEC-E5550"][0])
embedding = get_embedding(encoded_sent, segments_ids)

cos(embedding, course_words_embeddings['LC-1117'])

tensor(0.9001)

In [42]:
courses[(courses["code"] == "LC-1117") | (courses["code"] == "LC-1310")]["content"][0]

'This course introduces written and oral communication principles and strategies that are applicable to professional and academic purposes and is at the same time integrated with a content course. Written tasks and oral tasks will support the content course and be largely determined by its requirements. Throughout this course, students work individually and/or in small groups to develop their presentation and writing skills. Moreover, students give and receive constructive feedback on their work and revise it accordingly. Working Life Skills: Varies according to each integration project.'

In [43]:
courses[(courses["code"] == "LC-1117") | (courses["code"] == "LC-1310")]["content"][1]

'The course is intended for students in master&#39;s programs who are not yet in the process of writing their master&#39;s theses. It introduces written and oral communication principles and strategies that are applicable to academic and research purposes. Students begin by writing on a topic from their own field of study based on previous (BSc) or ongoing (MSc) research. To enhance readability of the texts, students apply organisational patterns, such as problem-solution, as well as other writing principles. Based on their written work, students deliver an oral presentation videoed for self- and teacher-evaluation. As part of the learning process, students analyse presentations to identify their strengths and areas for improvement, as well as practise organising and presenting information clearly to a non-expert, but academic audience. Throughout this course, students work individually, in pairs and in small groups to develop their presentation and writing skills. Moreover, students g

In [135]:
def get_relavent_courses(course_code, n):
    """
    arg: course_code
    course code
    arg: n
    number of relavent courses
    """
    query_embedding = course_words_embeddings[course_code]
    
    course_codes = list(course_words_embeddings.keys())
    course_scores = [cos(course_embedding, query_embedding) for course_embedding in course_words_embeddings.values()]
    
    # sorted_course_codes = [sorted(zip(course_scores, course_codes), reverse=True)][:n]
    sorted_course_codes = [course_code for _, course_code in sorted(zip(course_scores, course_codes), reverse=True)][:n]
    
    return sorted_course_codes
    
    
def get_unrelavent_courses(course_code, n):
    """
    arg: course_code
    course code
    arg: n
    number of relavent courses
    """
    query_embedding = course_words_embeddings[course_code]
    
    course_codes = list(course_words_embeddings.keys())
    course_scores = [cos(course_embedding, query_embedding) for course_embedding in course_words_embeddings.values()]
    
    # sorted_course_codes = [sorted(zip(course_scores, course_codes), reverse=True)][:n]
    sorted_course_codes = [course_code for _, course_code in sorted(zip(course_scores, course_codes), reverse=True)][-n:]
    
    return sorted_course_codes

In [166]:
snlp_relavent_courses = get_relavent_courses("ELEC-E5550", 10)
snlp_unrelavent_courses = get_unrelavent_courses("ELEC-E5550", 10)


In [167]:
snlp_relavent_courses

['ELEC-E5550',
 'ELEC-D7011',
 'CS-E4880',
 'CHEM-E2125',
 'ELO-E8005',
 'CS-E4650',
 '32E30001',
 'ELEC-E8102',
 'ABL-C1102',
 'USP-E0305']

In [168]:
relavent_courses = courses[courses["code"].isin(snlp_relavent_courses)]
relavent_courses.set_index("code").reindex(snlp_relavent_courses)

Unnamed: 0_level_0,id,courseUnitId,type,name,content,learningOutcomes,teachers,teachingPeriod,startDate,endDate,mincredits,maxcredits,enrolmentStartDate,enrolmentEndDate,organizationId,organizationName,organizations,linkToCourse
code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
ELEC-E5550,aalto-CUR-163444-3084151,aalto-OPINKOHD-1125519948-20210801,teaching-participation-lectures,"Statistical Natural Language Processing D, Lec...",Many core applications in modern information s...,"After attending the course, the student knows ...",['Mikko Juhani Kurimo'],"2020-2021 Spring III-IV , 2021-2022 Spring III-IV",2023-01-10,2023-04-18,5,5,2022-12-12,2023-01-16,T412,Department of lnformation and Communications E...,aalto-org-t412-20230101,https://mycourses.aalto.fi/co...
ELEC-D7011,aalto-CUR-168626-3088782,aalto-CU-1150933196-20220801,teaching-participation-lectures,"Human Factors Engineering, Lecture",Course contents cover theories and models of h...,This course provides students in all areas of ...,['Antti Olavi Oulasvirta'],<p>See course homepage in MyCourses for more i...,2023-04-25,2023-06-07,5,5,2023-03-27,2023-05-01,T412,Department of lnformation and Communications E...,aalto-org-t412-20230101,https://mycourses.aalto.fi/co...
CS-E4880,aalto-CUR-166565-3087272,aalto-OPINKOHD-1129662378-20210801,teaching-participation-lectures,"Machine Learning in Bioinformatics D, Lecture",Machine learning is one of the cornerstone tec...,The students will learn how machine learning u...,"['Juho Heikki Rousu', 'Vikas Kumar Garg']","2020-2021 Spring IV-V, (2021, 2022) - No teaching",2023-03-03,2023-06-02,5,5,2023-02-10,2023-02-24,T313,Department of Computer Science,aalto-3b3aa303-843a-4a29-97ca-29c45d53f923,https://mycourses.aalto.fi/co...
CHEM-E2125,aalto-CUR-164498-3085205,aalto-OPINKOHD-1121163615-20210801,teaching-participation-lectures,"Web-Based Natural Fiber Products, Lecture",The course gives students an overview of the m...,After the course the student <ul><li>knows dif...,"['Eero Johannes Hiltunen', 'Thaddeus Christoph...","2020-2021 Spring III-IV , 2021-2022 Spring III-IV",2023-01-13,2023-04-18,5,5,2022-12-12,2023-01-12,T107,Department of Bioproducts and Biosystems,aalto-1e1af3eb-c61a-4e69-b49e-ff2c8f5d505c,https://mycourses.aalto.fi/co...
ELO-E8005,aalto-CUR-170101-3089139,aalto-CU-1150932809-20220801,teaching-participation-lectures,"Creative Ateliers, Lecture",Creative Animation Ateliers II - development; ...,Creative Animation Ateliers II - development; ...,['Tuula Maaria Leinonen'],<p> Teaching Language : English</p><p> Teachin...,2023-03-27,2023-05-19,9,9,2023-01-10,2023-02-20,A801,"Department of Film, Television and Scenography",aalto-441ac1c6-e05d-406f-8842-8122041dc75b,https://mycourses.aalto.fi/co...
CS-E4650,aalto-CUR-166397-3087104,aalto-OPINKOHD-1142276204-20210801,exam-exam,"Methods of Data Mining D, Exam",The course covers fundamental data mining prob...,"After the course, the students have an overvie...",['Jorma Tapio Laaksonen'],"2020-2021 Autumn I-II, 2021-2022 Autumn I-II",2023-02-22,2023-02-22,5,5,2022-12-24,2023-02-15,T313,Department of Computer Science,aalto-3b3aa303-843a-4a29-97ca-29c45d53f923,https://mycourses.aalto.fi/co...
32E30001,aalto-CUR-167064-3087771,otm-d18b3d14-423b-4524-b285-19e09a0fc7b1,teaching-participation-lectures,"Tax Challenges for Multinational Enterprises, ...",The course focuses on selected topical issues ...,The students learn how to analyse and solve ta...,['Mika Petteri Rapo'],<p> Teaching Language : English</p><p> Teachin...,2023-04-25,2023-05-11,6,6,2023-03-27,2023-04-24,E701,Department of Accounting,aalto-e43a65a8-9ca6-4192-b64c-7887ff053b7a,https://mycourses.aalto.fi/co...
ELEC-E8102,aalto-CUR-163639-3084346,otm-fe265578-3e7d-479c-928f-35bd400c01cf,exam-exam,Distributed and Intelligent Automation Systems...,This course will address the problem of how to...,Understanding of challenges of distributed sys...,"['Valeriy Vyatkin', 'Udayanto Dwi Atmojo', 'Pr...",<p> Teaching Language : English</p><p> Teachin...,2023-02-27,2023-02-27,5,5,2022-12-29,2023-02-20,T410,Department of Electrical Engineering and Autom...,aalto-e7fcc876-6768-4ed9-adc6-05d15ea7f806,https://mycourses.aalto.fi/co...
ABL-C1102,aalto-CUR-169054-2372693,aalto-OPINKOHD-1142279260-20210801,teaching-participation-lectures,Hands-On Analytics on Accounting Information S...,The course covers the foundations to understan...,After the course students will: (1) master the...,['Vikash Kumar Sinha'],"2020-2021 Spring IV , 2021-2022 Spring IV, <br />",2023-03-01,2023-04-19,6,6,2023-01-30,2023-03-06,E701,Department of Accounting,aalto-e43a65a8-9ca6-4192-b64c-7887ff053b7a,https://mycourses.aalto.fi/co...
USP-E0305,aalto-CUR-167214-3087921,aalto-OPINKOHD-1132846285-20210801,teaching-participation-lectures,"Urban Challenge Studio 2, Lecture",Common urban challenge studios integrate multi...,"Upon completion of the course, participants sh...","['Pia Christina Fricker', 'Christine Mady']","2020-2021 Spring III-IV, 2021-2022 Spring III-IV",2023-01-12,2023-04-05,10,10,2022-12-12,2023-01-02,T201,Department of Architecture,aalto-3910b7cc-43fa-444b-80c0-c664d50ecbb9,https://mycourses.aalto.fi/co...


In [157]:
courses[courses["code"] == "CS-E5600"]["content"][423]


'Course presents a selection of ideas and concepts in contemporary aesthetic thinking through a wide selection of real-life examples that combine technology, design, and art. The weekly contact teaching sessions consist of lectures, small group discussions, and planning and executing a group task.'

In [125]:
courses[courses["code"].isin(snlp_unrelavent_courses)]

Unnamed: 0,id,code,courseUnitId,type,name,content,learningOutcomes,teachers,teachingPeriod,startDate,endDate,mincredits,maxcredits,enrolmentStartDate,enrolmentEndDate,organizationId,organizationName,organizations,linkToCourse
71,aalto-CUR-162855-3083562,MEC-E7007,aalto-OPINKOHD-1125621531-20210801,exam-exam,"Factory Project, Exam",<ol><li>Introduction</li><li>Investment projec...,The objective of the course is to combine and ...,['Juha Matti Huuki'],"2020-2021 Autumn I-II, 2021-2022 Autumn I-II",2023-02-24,2023-02-24,5,5,2022-12-26,2023-02-17,T212,Department of Mechanical Engineering,aalto-ad156bf6-a982-4ae6-a7a3-fc83dff7c9dd,https://mycourses.aalto.fi/co...
97,aalto-CUR-162983-3083690,SPT-E4010,aalto-OPINKOHD-1125692110-20210801,teaching-participation-lectures,"Transport Modelling D, Lecture",<ul><li>Introduction to Transport Modelling</l...,A student who has passed the course will be ab...,['Claudio Roncoli'],"2020-2021 Spring III, 2021-2022 Spring III",2023-01-09,2023-02-17,5,5,2022-12-12,2023-01-16,T213,Department of Built Environment,aalto-1eccce2c-22df-42a2-9123-ca85f142b6d7,https://mycourses.aalto.fi/co...
121,aalto-CUR-163063-3083770,SPT-E4040,aalto-OPINKOHD-1142076648-20210801,teaching-participation-lectures,"Integrated Urban Transport D, Lecture",The course content will include: <ul><li>Trans...,A student who has passed the course will be ab...,['Milos Mladenovic'],"2020-2021 Spring III, 2021-2022 Spring III",2023-02-27,2023-04-14,5,5,2023-01-30,2023-02-27,T213,Department of Built Environment,aalto-1eccce2c-22df-42a2-9123-ca85f142b6d7,https://mycourses.aalto.fi/co...
407,aalto-CUR-166227-3086934,CS-E4110,aalto-OPINKOHD-1125600015-20210801,exam-exam,"Concurrent Programming D, Exam","Principles of concurrent programming, synchron...",You understand the benefits of systems with co...,['Vesa Lauri Ilmari Hirvisalo'],"2020-2021 Autumn II, 2021-2022 Autumn II",2023-02-20,2023-02-20,5,5,2022-12-22,2023-02-13,T313,Department of Computer Science,aalto-3b3aa303-843a-4a29-97ca-29c45d53f923,https://mycourses.aalto.fi/co...
441,aalto-CUR-166435-3087142,CS-E4710,aalto-OPINKOHD-1142279259-20210801,exam-exam,"Machine Learning: Supervised Methods D, Exam",Generalization error analysis and estimation; ...,"After the course, the student knows how to rec...",['Juho Heikki Rousu'],"2020-2021 Autumn I-II, 2021-2022 Autumn I-II",2023-02-21,2023-02-21,5,5,2022-12-23,2023-02-14,T313,Department of Computer Science,aalto-3b3aa303-843a-4a29-97ca-29c45d53f923,https://mycourses.aalto.fi/co...
454,aalto-CUR-166552-3087259,TU-E2230,aalto-CU-1150933402-20220801,teaching-participation-lectures,"Machine Learning in Financial Engineering, Lec...",<ul><li><i>Data analysis: </i>Financial data s...,This course complements the content of TU-E221...,"['Ruth-Johanna Cleopatra Kaila', 'Eljas Mikko ...",<p> Teaching Language : English</p><p> Teachin...,2023-01-11,2023-04-05,3,6,2022-12-15,2023-01-16,T307,Department of Industrial Engineering and Manag...,aalto-9a94b0a1-836d-4384-9bde-6709e8a053db,https://mycourses.aalto.fi/co...
723,aalto-CUR-168488-2367507,ABL-C1111,aalto-CU-1150932861-20220801,exam-exam,"Introduction to Financial Accounting, Exam",<ul><li>Conceptual Framework and financial sta...,After having completed the course students sho...,['Nina Elina Sormunen'],<p>Points from the exercises are valid only in...,2023-04-03,2023-04-03,6,6,2023-02-02,2023-03-27,E701,Department of Accounting,aalto-e43a65a8-9ca6-4192-b64c-7887ff053b7a,https://mycourses.aalto.fi/co...
781,aalto-CUR-169285-2381825,NBE-E4130,aalto-OPINKOHD-1121787436-20210801,teaching-participation-lectures,"Information Processing in Neural Circuits D, L...",<ul><li>Basic building blocks of the neural ci...,"After completing the course, the student shoul...",['Petri Juhani Ala-Laurila'],"2020-2021 Spring III-V, (2021, 2022) - No teac...",2023-01-11,2023-06-08,5,5,2022-12-12,2023-01-16,T314,Department of Neuroscience and Biomedical Engi...,aalto-1311f55a-509b-485d-b974-5ddaf28ffaa8,https://mycourses.aalto.fi/co...
805,aalto-CUR-169528-2389769,MS-E1622,aalto-CU-1150933374-20220801,teaching-participation-lectures,"Algebraic Methods in Data Science, Lecture",The contents of this course include the follow...,"At the end of this course, the student can\n<u...",['Kaie Kubjas'],<p> Teaching Language : English</p><p> Teachin...,2023-01-13,2023-04-14,5,5,2022-12-12,2023-01-20,T302,Department of Mathematics and Systems Analysis,aalto-a3b50b1e-ed7d-4136-8897-c7aa1a4f899b,https://mycourses.aalto.fi/co...
893,aalto-CUR-170382-2483586,ECON-C1900,otm-ef57d0d5-084d-418e-abe0-1bbf6ea4a836,exam-exam,Mathematical Methods for Economics Research: O...,"<ul><li>Must know: KKT conditions, convex prog...",This course is intended to aid the student in ...,['Daniel Niels Hauser'],<p> Teaching Language : English</p><p> Teachin...,2023-05-15,2023-05-15,6,6,2023-03-16,2023-05-08,E703,Department of Economics,aalto-42c75b87-18b8-4469-9dc4-a25998c2b22c,https://mycourses.aalto.fi/co...


In [90]:
snlp_relavent_content = [data_dict[course] for course in snlp_relavent_courses]


In [104]:
tokens = [preprocessing(content[0]) for content in snlp_relavent_content]

In [105]:
tokens

[([101, 101, 1049, 102], [0, 0, 0, 0]),
 ([101, 101, 1049, 102], [0, 0, 0, 0]),
 ([101, 101, 1049, 102], [0, 0, 0, 0]),
 ([101, 101, 1049, 102], [0, 0, 0, 0]),
 ([101, 101, 1049, 102], [0, 0, 0, 0]),
 ([101, 101, 1049, 102], [0, 0, 0, 0]),
 ([101, 101, 1049, 102], [0, 0, 0, 0]),
 ([101, 101, 1049, 102], [0, 0, 0, 0]),
 ([101, 101, 1049, 102], [0, 0, 0, 0]),
 ([101, 101, 1049, 102], [0, 0, 0, 0]),
 ([101, 101, 1049, 102], [0, 0, 0, 0]),
 ([101, 101, 1049, 102], [0, 0, 0, 0]),
 ([101, 101, 1049, 102], [0, 0, 0, 0]),
 ([101, 101, 1049, 102], [0, 0, 0, 0]),
 ([101, 101, 1049, 102], [0, 0, 0, 0]),
 ([101, 101, 1049, 102], [0, 0, 0, 0]),
 ([101, 101, 1049, 102], [0, 0, 0, 0]),
 ([101, 101, 1049, 102], [0, 0, 0, 0]),
 ([101, 101, 1049, 102], [0, 0, 0, 0]),
 ([101, 101, 1049, 102], [0, 0, 0, 0]),
 ([101, 101, 1049, 102], [0, 0, 0, 0]),
 ([101, 101, 1049, 102], [0, 0, 0, 0]),
 ([101, 101, 1049, 102], [0, 0, 0, 0]),
 ([101, 101, 1049, 102], [0, 0, 0, 0]),
 ([101, 101, 1049, 102], [0, 0, 0, 0]),
