In [46]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from datetime import datetime, timedelta

In [108]:

def get_sundays_between(start_date_str, end_date_str):
    # Convert input strings to datetime objects
    start_date = datetime.strptime(start_date_str, "%Y-%m-%d")
    end_date = datetime.strptime(end_date_str, "%Y-%m-%d")

    # Find the first Sunday on or after the start date
    start_sunday = start_date + timedelta(days=(6 - start_date.weekday()) % 7)

    # List to store all Sundays
    sundays = []

    # Iterate from start_sunday to end_date, adding 7 days each time
    while start_sunday <= end_date:
        sundays.append(start_sunday.strftime("%Y-%m-%d"))
        start_sunday += timedelta(days=7)

    return sundays

In [90]:
element_html = ""
# Set up the WebDriver (assuming you have ChromeDriver installed)
driver = webdriver.Chrome()
directions = ["91","92","89","93","90","94"]
days = get_sundays_between("2024-10-06", "2025-01-19")
# Open the website
for direction in directions:
    for day in days:
        driver.get(f'https://urnik.fs.uni-lj.si/layer_one/{direction}/?day={day}')

        # Extract content, e.g., an element by its ID
        element = driver.find_element(By.ID, 'entries')
        element_html += element.get_attribute('innerHTML')


# Close the browser
driver.quit()


In [109]:
def find_all_indexes(text, substring):
    indexes = []
    start = 0
    
    while True:
        # Find the next occurrence
        index = text.find(substring, start)
        
        if index == -1:
            break  # No more occurrences found
        
        indexes.append(index)
        # Move start just after the current found index
        start = index + 1
        
    return indexes

In [110]:
# Slice single entry
raw_data = element_html
raw_data = raw_data.replace('\n', '').strip()
indexes = find_all_indexes(raw_data, '<div class="entry-absolute-box')
indexes.append(len(raw_data))
entries = []
for i in range(len(indexes)-1):
    entries.append(raw_data[indexes[i]:indexes[i+1]])

In [111]:
def get_string_until(text, index, substring):
    string = ""
    i = index
    substring_length = len(substring)

    while i < len(text):
        if text[i:i + substring_length] == substring:
            break
        string += text[i]
        i += 1
    
    return string

def get_week_dates(last_day_str):
    # Parse the input date (assumed to be the last day of the week)
    last_day = datetime.strptime(last_day_str, "%d.%m.%Y")
    
    # Calculate the dates for the rest of the week (from Monday to Sunday)
    # In this case, we assume the week ends on Sunday (default)
    week_dates = []
    
    # Loop to get all 7 days from Monday to Sunday
    for i in range(6, -1, -1):  # 6 days before last day (Monday), till 0 (Sunday)
        day = last_day - timedelta(days=i)
        week_dates.append(day.strftime("%d.%m.%Y"))
    
    return week_dates

In [112]:
class Entry:
    def __init__(self, text):
        self.text = text
        self.title = ""
        self.abbreviation = ""
        self.direction = ""
        self.teacher = []
        self.classroom = ""
        self.group = ""
        self.date = ""
        self.day = ""
        self.time_start = ""
        self.duration = ""
        return self.get_data()
    
    def get_data(self):
        
        # TITLE ABBREVIATION
        subject_search = '<span class="subject">'
        subject_indexes = find_all_indexes(self.text, subject_search)
        subject = get_string_until(self.text, subject_indexes[0]+len(subject_search), '</a>')
        title_search = "title="
        title_indexes = find_all_indexes(subject, title_search)
        title = get_string_until(subject, title_indexes[0]+len(title_search)+1, '"')
        self.title = title[:-7]
        self.abbreviation = title[-5:-1]
        
        # DIRECTION
        direction_search = '<span class="layer_one">'
        direction_indexes = find_all_indexes(self.text, direction_search)
        direction = get_string_until(self.text, direction_indexes[0]+len(direction_search), '</span>')
        title_search = "title="
        title_indexes = find_all_indexes(direction, title_search)
        self.direction = get_string_until(direction, title_indexes[0]+len(title_search)+1, '"').split('(')[0].strip()
        
        # CLASSROOM
        classroom_search = '<div class="classroom">'
        classroom_indexes = find_all_indexes(self.text, classroom_search)
        classroom = get_string_until(self.text, classroom_indexes[0]+len(classroom_search), '</div>')
        title_search = "title="
        title_indexes = find_all_indexes(classroom, title_search)
        self.classroom = get_string_until(classroom, title_indexes[0]+len(title_search)+1, '"').split(' ')[-1]
        
        # TEACHER
        teacher_search = '<div class="teacher">'
        teacher_indexes = find_all_indexes(self.text, teacher_search)
        teacher = get_string_until(self.text, teacher_indexes[0]+len(teacher_search), '</div>')
        title_search = "title="
        title_indexes = find_all_indexes(teacher, title_search)
        for title_index in title_indexes:
            self.teacher.append(get_string_until(teacher, title_index+len(title_search)+1, '"'))
        
        # TYPE GROUP
        group_search = '<span class="entry-type">'
        group_indexes = find_all_indexes(self.text, group_search)
        type_group = get_string_until(self.text, group_indexes[0]+len(group_search), '<').strip()
        if '(' in type_group:
            self.type = get_string_until(type_group, 0, '(')
            self.group = type_group.split('(')[1][:-1]
        else:
            self.type = type_group
            if self.type != "P":
                self.group = "S1"
        
        # DAY
        left_indexes = find_all_indexes(self.text, 'left: ')
        left = get_string_until(self.text, left_indexes[0]+6, '%')
        if(float(left)>=0.00 and float(left)<20.00): self.day = "monday"
        elif(float(left)>=20.00 and float(left)<40.00): self.day = "tuesday"
        elif(float(left)>=40.00 and float(left)<60.00): self.day = "wednesday"
        elif(float(left)>=60.00 and float(left)<80.00): self.day = "thursday"
        elif(float(left)>=80.00 and float(left)<100.00): self.day = "friday"
        else: print(f'Invalid left: {left}')
            
        # DATE
        date_indexes = find_all_indexes(self.text, 'day=')
        date = get_string_until(self.text, date_indexes[0]+4, '"').split('-')
        last_day_of_week = f'{date[2]}.{date[1]}.{date[0]}'
        week = get_week_dates(last_day_of_week)
        if self.day == "monday": self.date = week[0]
        elif self.day == "tuesday": self.date = week[1]
        elif self.day == "wednesday": self.date = week[2]
        elif self.day == "thursday": self.date = week[3]
        elif self.day == "friday": self.date = week[4]
        else: print(f'Invalid day of week: {self.day}')
            
        # TIME START
        top_indexes = find_all_indexes(self.text, 'top: ')
        top = get_string_until(self.text, top_indexes[0]+5, '%')
        if(top ==  "7.69"): self.time_start = "8:00"
        elif(top ==  "23.08"): self.time_start = "10:00"
        elif(top ==  "38.46"): self.time_start = "12:00"
        elif(top ==  "50.00"): self.time_start = "13:30"
        elif(top ==  "53.85"): self.time_start = "14:00"
        elif(top ==  "69.23"): self.time_start = "16:00"
        elif(top ==  "84.62"): self.time_start = "18:00"
        else: print(f'Invalid top: {top}')
            
        # DURATION
        height_indexes = find_all_indexes(self.text, 'height: ')
        height = get_string_until(self.text, height_indexes[0]+8, '%')
        if(height ==  "15.38"): self.duration = "2:00"
        else: print(f'Invalid height: {height}')
        
    def print(self):
        print(f'Title: {self.title}')
        print(f'Abbreviation: {self.abbreviation}')
        print(f'Direction: {self.direction}')
        print(f'Teacher: {self.teacher}')
        print(f'Classroom: {self.classroom}')
        print(f'Type: {self.type}')
        print(f'Group: {self.group}')
        print(f'Date: {self.date}')
        print(f'Day: {self.day}')
        print(f'Time start: {self.time_start}')
        print(f'Duration: {self.duration}')
        print('\n')

In [113]:
# Create list of objects
entry_objects = []
for entry in entries:
    new_entry = Entry(entry)
    entry_objects.append(new_entry)
    #break

In [114]:
# Make structured data
data = {}
for entry in entry_objects:
    if entry.abbreviation not in data:
        data[entry.abbreviation] = {}
        data[entry.abbreviation]["name"] = entry.title
        data[entry.abbreviation]["abbreviation"] = entry.abbreviation
        data[entry.abbreviation]["direction"] = entry.direction
        data[entry.abbreviation]["lecture"] = {}
        data[entry.abbreviation]["exercises"] = {}
    if entry.type == "P":
        lecture = data[entry.abbreviation]["lecture"]
        if f'{entry.date}' not in lecture:
            lecture[f'{entry.date}'] = {}
        lecture_date = lecture[f'{entry.date}']
        lecture_date["type"] = entry.type
        lecture_date["group"] = ""
        lecture_date["day"] = entry.day
        lecture_date["date"] = entry.date
        lecture_date["timeStart"] = entry.time_start
        lecture_date["duration"] = entry.duration
        lecture_date["classroom"] = entry.classroom
    else:
        exercises = data[entry.abbreviation]["exercises"]
        if f'{entry.type}{entry.group}' not in exercises:
            exercises[f'{entry.type}{entry.group}'] = {}
        exercise = exercises[f'{entry.type}{entry.group}']
        if f'{entry.date}' not in exercise:
            exercise[f'{entry.date}'] = {}
        exercise_date = exercise[f'{entry.date}']
        exercise_date["type"] = entry.type
        exercise_date["group"] = entry.group
        exercise_date["day"] = entry.day
        exercise_date["date"] = entry.date
        exercise_date["timeStart"] = entry.time_start
        exercise_date["duration"] = entry.duration
        exercise_date["classroom"] = entry.classroom

for subject in data:
    for group in data[subject]["exercises"]:
        #print(group)
        #print(data["LMSM"]["exercises"][group])
        data[subject]["exercises"] = dict(sorted(data[subject]["exercises"].items()))


In [115]:
# Define the name of the file
file_name = 'data.js'

# Open the file in write mode ('w'). If it doesn't exist, it will be created.
with open(file_name, 'w', encoding='utf-8') as file:
    text = ""
    for subject in data:
        text += f'''
const {subject} =
    {data[subject]}
'''
    #print(text)
    file.write(text)

print(f'Content has been written to {file_name}.')


Content has been written to data.js.
