Skip to content

Commit

Permalink
implemented a way to scrape the tumonline calendar
Browse files Browse the repository at this point in the history
  • Loading branch information
CommanderStorm committed Jan 2, 2023
1 parent bf2d656 commit d1588cc
Show file tree
Hide file tree
Showing 10 changed files with 704 additions and 35 deletions.
2 changes: 1 addition & 1 deletion data/processors/export.py
Expand Up @@ -116,7 +116,7 @@ def export_for_api(data, path):
if "roomfinder_data" in export_data[_id]:
del export_data[_id]["roomfinder_data"]
if "props" in export_data[_id]:
prop_keys_to_keep = {"computed", "links", "comment", "calendar_url"}
prop_keys_to_keep = {"computed", "links", "comment", "calendar_url", "tumonline_room_nr"}
to_delete = [e for e in export_data[_id]["props"].keys() if e not in prop_keys_to_keep]
for k in to_delete:
del export_data[_id]["props"][k]
Expand Down
10 changes: 8 additions & 2 deletions data/processors/sections.py
Expand Up @@ -7,6 +7,12 @@ def extract_calendar_urls(data):
if entry.get("tumonline_data", {}).get("calendar", None):
url = f"https://campus.tum.de/tumonline/{entry['tumonline_data']['calendar']}"
entry["props"]["calendar_url"] = url
if entry.get("tumonline_data", {}).get("room_link", None):
url: str = entry['tumonline_data']['room_link']
entry["props"]["tumonline_room_nr"] = int(url.removeprefix("wbRaum.editRaum?pRaumNr="))
elif entry.get("tumonline_data", {}).get("address_link", None):
url: str = entry['tumonline_data']['address_link']
entry["props"]["tumonline_room_nr"] = int(url.removeprefix("ris.einzelraum?raumkey="))


def compute_props(data):
Expand Down Expand Up @@ -160,8 +166,8 @@ def generate_rooms_overview(data):
for _id, entry in data.items():
# if entry["type"] not in {"building", "joined_building", "virtual_room"} or \
if (
entry["type"] not in {"area", "site", "campus", "building", "joined_building", "virtual_room"}
or "children_flat" not in entry
entry["type"] not in {"area", "site", "campus", "building", "joined_building", "virtual_room"}
or "children_flat" not in entry
):
continue

Expand Down
9 changes: 7 additions & 2 deletions server/Cargo.toml
Expand Up @@ -18,9 +18,11 @@ strip = true

[dependencies]
log = "0.4.17"
diesel = { version = "2.0.2", features = ["default","sqlite"] }
diesel = { version = "2.0.2", features = ["default","chrono","sqlite"] }
actix-web = "4.2.1"
awc = "3.0.1"
actix-rt = "2.7.0"
rustls = "0.20.7"
awc = { version= "3.0.1", features = ["rustls"] }
cached = "0.41.0"
serde = { version = "1.0.148", features = ["derive"] }
serde_json = "1.0.89"
Expand All @@ -31,3 +33,6 @@ env_logger = "0.10.0"
image = "0.24.5"
rusttype= "0.9.3"
imageproc= "0.23.0"
chrono = { version="0.4.23", features=["serde","rustc-serialize"] }
minidom = "0.15.0"
rand = "0.8.5"
76 changes: 48 additions & 28 deletions server/load_api_data_to_db.py
Expand Up @@ -6,32 +6,51 @@
def add_to_database(de_data, en_data):
"""add data consisting of 2x(key, data_json, data) to the sqlite database"""
con: sqlite3.Connection = sqlite3.connect("data/api_data.db")
con.execute(
"""
CREATE TABLE IF NOT EXISTS de (
key VARCHAR(30) UNIQUE PRIMARY KEY NOT NULL,
name VARCHAR(30) NOT NULL,
arch_name VARCHAR(30), -- NOT Unique, but only used for the old roomfinder
type VARCHAR(30) NOT NULL,
type_common_name VARCHAR(30) NOT NULL,
lat FLOAT NOT NULL,
lon FLOAT NOT NULL,
data TEXT NOT NULL
);""",
)
con.execute(
"""
CREATE TABLE IF NOT EXISTS en (
key VARCHAR(30) UNIQUE PRIMARY KEY NOT NULL,
name VARCHAR(30) NOT NULL,
arch_name VARCHAR(30), -- NOT Unique, but only used for the old roomfinder. This is only here temporarily
type VARCHAR(30) NOT NULL,
type_common_name VARCHAR(30) NOT NULL,
lat FLOAT NOT NULL,
lon FLOAT NOT NULL,
data TEXT NOT NULL
);""",
)
for lang in ["de", "en"]:
con.execute(f"DROP TABLE IF EXISTS {lang}")
con.execute(
f"""
CREATE TABLE {lang} (
key VARCHAR(30) UNIQUE PRIMARY KEY NOT NULL,
name VARCHAR(30) NOT NULL,
tumonline_room_nr INTEGER NULLABLE, -- used for calendars
arch_name VARCHAR(30), -- NOT Unique, but only used for the old roomfinder. This is only here temporarily
type VARCHAR(30) NOT NULL,
type_common_name VARCHAR(30) NOT NULL,
lat FLOAT NOT NULL,
lon FLOAT NOT NULL,
data TEXT NOT NULL
);""",
)
for tbl in ["calendar", "calendar_scrape"]:
con.execute(f"DROP TABLE IF EXISTS {tbl}")
con.execute(
f"""
CREATE TABLE {tbl} (
key VARCHAR(30) NOT NULL,
dtstart DATETIME NOT NULL,
dtend DATETIME NOT NULL,
dtstamp DATETIME NOT NULL,
event_id INTEGER NOT NULL,
event_title TEXT NOT NULL,
single_event_id INTEGER UNIQUE PRIMARY KEY NOT NULL,
single_event_type_id TEXT NOT NULL,
single_event_type_name TEXT NOT NULL,
event_type_id TEXT NOT NULL,
event_type_name TEXT NULLABLE,
course_type_name TEXT NULLABLE,
course_type TEXT NULLABLE,
course_code TEXT NULLABLE,
course_semester_hours INTEGER NULLABLE,
group_id TEXT NULLABLE,
xgroup TEXT NULLABLE,
status_id TEXT NOT NULL,
status TEXT NOT NULL,
comment TEXT NOT NULL
);""",
)
# purposely, this index is only on this table and not on tmp_calendar
con.execute("CREATE INDEX IF NOT EXISTS calendar_lut ON calendar(key, dtstart, dtend)")
# we are using this file in docker, so we don't want to use an acid compliant database ;)
con.execute("""PRAGMA journal_mode = OFF;""")
con.execute("""PRAGMA synchronous = OFF;""")
Expand All @@ -41,6 +60,7 @@ def map_data(key, data_json, data):
key,
data_json,
data["name"],
data["props"].get("tumonline_room_nr", None),
data["arch_name"],
data["type"],
data["type_common_name"],
Expand All @@ -53,11 +73,11 @@ def map_data(key, data_json, data):

with con:
con.executemany(
"INSERT INTO de(key,data,name,arch_name,type,type_common_name,lat,lon) VALUES (?,?,?,?,?,?,?,?)",
"INSERT INTO de(key,data,name,tumonline_room_nr,arch_name,type,type_common_name,lat,lon) VALUES (?,?,?,?,?,?,?,?,?)",
de_data,
)
con.executemany(
"INSERT INTO en(key,data,name,arch_name,type,type_common_name,lat,lon) VALUES (?,?,?,?,?,?,?,?)",
"INSERT INTO en(key,data,name,tumonline_room_nr,arch_name,type,type_common_name,lat,lon) VALUES (?,?,?,?,?,?,?,?,?)",
en_data,
)

Expand Down

0 comments on commit d1588cc

Please sign in to comment.