# SCUT iCalendar Project

`pip install icalendar` 以安装依赖。

不要修改以下函数代码，只需在调用 `main()` 时将参数替换为你要的即可（最下面的某个单元格）。

In [None]:
from IPython.display import display
import pandas as pd
import re
from icalendar import Calendar, Event
from icalendar import vText, vDatetime
import datetime
from datetime import date, time, timedelta
import unicodedata
from dateutil import parser
import pprint

In [None]:
def divide(text):
    if isinstance(text, str):
        if '\r\n' in text:
            return text.split('\r\n')
    return text

In [None]:
def parse_cell(cell):
    
    
    def parse_class(text):
        lst = text.split('/')
        dct = {'name': lst[0]}
        if len(lst) > 2:
            dct['location'] = lst[2]
        if len(lst) > 3:
            dct['professor'] = lst[3]
        if len(lst) > 4:
            dct['student_number'] = lst[4]
        try:
            dct['week_period_text'] = lst[1].split(')', 1)[-1]
            if lst[1].startswith('('):
                dct['class_period_text'] = lst[1].split(')')[0][1:]
                _ = re.findall(r'[0-9]+', dct['class_period_text'])
                dct['class_period'] = (int(_[0]), int(_[-1]))
            _ = re.findall(r'[0-9]+', dct['week_period_text'])
            dct['week_period'] = (int(_[0]), int(_[-1]))
            _ = re.split(
                r'\b', ''.join(re.split(r'[^0-9-,]+', dct['week_period_text']))
            )
            exclude_week = []
            for i, value in enumerate(_):
                if value == ',':
                    exclude_week.extend(
                        range(int(_[i - 1]) + 1, int(_[i + 1]))
                    )
            if len(exclude_week) > 0:
                dct['exclude_week'] = exclude_week
        except:
            pass
        finally:
            return dct


    if isinstance(cell, str):
        return parse_class(cell)
    elif isinstance(cell, list):
        return [parse_class(i) for i in cell]
    return cell

In [None]:
def parse_column(column):
    
    
    def add_week_day_to_cell(cell):
        
        
        def add_week_day(dct):
            week_day_to_number = {
                '星期一': 1, 
                '星期二': 2, 
                '星期三': 3, 
                '星期四': 4, 
                '星期五': 5, 
                '星期六': 6, 
                '星期日': 7
            }
            dct['week_day'] = week_day_to_number[column.name]
            return dct
        
        
        if isinstance(cell, dict):
            return add_week_day(cell)
        elif isinstance(cell, list):
            return [add_week_day(i) for i in cell]
        return cell
    
    
    return column.map(add_week_day_to_cell)

In [None]:
def parse_row(row):
    
    
    def add_class_period_to_cell(cell):
        
        
        def add_class_period(dct):
            class_period_to_tuple = {
                '第一二节': (1, 2),
                '第三四节': (3, 4),
                '第五六节': (5, 6),
                '第七八节': (7, 8),
                '第九十节': (9, 10),
                '第十一十二节': (11, 12)
            }
            if not dct.get('class_period'):
                dct['class_period'] = class_period_to_tuple[row.name]
            return dct
                
                
        if isinstance(cell, dict):
            return add_class_period(cell)
        elif isinstance(cell, list):
            return [add_class_period(i) for i in cell]
        return cell
    
    
    return row.map(add_class_period_to_cell)

In [None]:
def parse(f):
    return pd.read_excel(
        f, header=1, index_col=1
    ).drop(
        '节次', axis=1
    ).applymap(divide).applymap(parse_cell).apply(parse_column).apply(parse_row, axis=1)

In [None]:
def to_event(dct, schedule, first_monday):
    class_period, exclude_week, location, name, professor, student_number, \
    week_day, week_period, week_period_text = dct['class_period'], dct.get(
        'exclude_week'), dct.get('location'), dct['name'], dct.get(
        'professor'), dct.get('student_number'), dct['week_day'], dct.get(
        'week_period', (1, 18)), dct.get('week_period_text')
    
    event = Event(summary=name)
    
    if week_period[0] < 0:
        week_period = [-i for i in week_period]
        if week_period[0] != week_period[-1]:
            event.add(
                'rrule', {'freq': 'weekly', 'interval': 2, 'count': (week_period[-1] - week_period[0]) / 2 + 1}
            )
    elif week_period[0] != week_period[-1]:
        event.add('rrule', {'freq': 'weekly', 'count': week_period[-1] - week_period[0] + 1})
    
    start_date = first_monday + timedelta(
        week_day - 1, weeks=week_period[0] - 1
    )
    start_datetime = datetime.datetime.combine(
        start_date, schedule[class_period[0]][0]
    )
    event.add('dtstart', start_datetime)

    end_datetime = datetime.datetime.combine(
        start_date, schedule[class_period[-1]][-1]
    )
    event.add('dtend', end_datetime)
    
    if exclude_week:
        exclude_datetime = [start_datetime + timedelta(weeks=i - week_period[0]) for i in exclude_week]
        event.add('exdate', exclude_datetime)
    
    if location:
        event['location'] = vText(location)
    
    class_period_text = '{}-{}节'.format(*class_period)
    if student_number:
        student_number_text = '选课人数：{}'.format(student_number)
    else:
        student_number_text = None
    description = [i for i in (class_period_text, week_period_text, professor, student_number_text) if i]
    event['description'] = vText('\n'.join(description))
    
    event['uid'] = vText('{}({}-{})'.format(week_day, *class_period) + name + '@franklinli.com')
    
    return event

In [None]:
def transform(text):
    if not isinstance(text, str):
        return text
    normalized = unicodedata.normalize('NFKC', text)
    _ = re.findall(r'[\d:]+', ''.join(normalized.split()))
    for i, value in enumerate(_):
        try:
            _[i] = parser.parse(value).time()
        except:
            pass
    if len(_) > 1:
        return (_[0], _[-1])
    return next(iter(_), None)

In [None]:
def select(schedule):
    schedule = schedule.loc[['第一节课', 
                             '第二节课', 
                             '第三节课', 
                             '第四节课', 
                             '第五节课', 
                             '第六节课', 
                             '第七节课', 
                             '第八节课', 
                             '第九节课', 
                             '第十节课', 
                             '第十一节课']].copy()
    if '第十二节课' not in schedule.index:
        to_datetime = lambda x: (datetime.datetime.combine(date.min, x[0]), datetime.datetime.combine(date.min, x[-1]))
        (_, wushan_end_pre), (_, uc_end_pre) = schedule.loc['第十节课'].map(to_datetime).tolist()
        (wushan_start, wushan_end), (uc_start, uc_end) = schedule.loc['第十一节课'].map(to_datetime).tolist()
        schedule.loc['第十二节课'] = [(
            wushan_end + (wushan_start - wushan_end_pre), 
            wushan_end + (wushan_end - wushan_end_pre)
        ), (
            wushan_end + (wushan_start - wushan_end_pre), 
            wushan_end + (wushan_end - wushan_end_pre)
        )]
        to_time = lambda x: (x[0].time(), x[-1].time())
        schedule.loc['第十二节课'] = schedule.loc['第十二节课'].map(to_time)
    schedule.index = pd.Index(range(1, 13))
    return schedule

In [None]:
def get_schedule(url):
    while True:
        try:
            schedule = next(iter(pd.read_html(url, header=0)), None)
        except:
            schedule = None
        if schedule is not None:
            break
    try:
        schedule.iloc[22] = schedule.iloc[22].shift()
        schedule.iloc[22, 0] = schedule.iloc[21, 0]
    except:
        pass
    finally:
        schedule = schedule.set_index('内容')
    return select(schedule.applymap(transform))

In [None]:
def get_first_monday(file):
    text = unicodedata.normalize('NFKC', '\n'.join(pd.read_excel(file).applymap(str).values.flatten()))
    _ = next(iter(re.findall('本学期(\d+)-(\d+)-(\d+)正式上课至', text)), None)
    if _:
        return date(*map(int, _))

不要修改 `main()` 函数，只需将调用的参数替换为你要的即可（下下个单元格）。

In [None]:
def main(file='16金融学(汇丰精英班)课表.xls', advance=False, schedule_url='https://web.archive.org/web/20180102091646/http://jwc.scuteo.com/jiaowuchu/cms/category/index.do?id=f25701314e913361014e91450b510010'):
    if file.endswith('.xls') or file.endswith('.xlsx'):
        lst = []
        for cell in parse(file).values.flatten().tolist():
            if isinstance(cell, dict) and cell not in lst:
                lst.append(cell)
            elif isinstance(cell, list):
                lst += [i for i in cell if i not in lst]
        first_monday = get_first_monday(file)
        if advance:
            with open(file.rsplit('.', 1)[0] + '.py', 'w') as f:
                f.write(pprint.pformat(lst + [first_monday]))
    else:
        lst = eval(open(file).read())
        first_monday = lst.pop()
    
    
    def get_campus():
        return sum(1 for i in lst if '大学城' in str(i.get('location'))) < sum(
            1 for i in lst if '五山' in str(i.get('location')))
    
    
    cal = Calendar(prodid='-//My calendar product//franklinli.com//', version='2.0')
    
    if get_campus():
        schedule = get_schedule(schedule_url)['五山校区时间']
    else:
        schedule = get_schedule(schedule_url)['大学城校区时间']
    
    for dct in lst:
        cal.add_component(to_event(dct, schedule, first_monday))

    with open(file.rsplit('.', 1)[0] + '.ics', 'wb') as f:
        f.write(cal.to_ical())

**将下面的文件路径修改为你的 `.xls`、`.xlsx` 课表文件（从新版教务管理系统上下载：信息查询 > 推荐课表打印 > 查询 > 点击链接预览 > 输出 Excel），然后运行，大功告成！在 notebook 同一文件夹下找到你的日历文件。**

In [None]:
main(file='16金融学(汇丰精英班)课表.xls')

## `main()` 参数

```
main(file, advance, schedule_url, school_calendar_url)
```

`file` 可以是一个 `.xls`、`.xlsx` 的课表文件路径，也可以是一个包含课程信息的 `.py` 文件路径（以高级模式运行时生成）。默认为 `'16金融学(汇丰精英班)课表.xls'`。

`advance` 是一个 `bool`，为 `True` 时以高级模式运行，生成一个包含课程信息的 `.py` 文件，注意会覆盖同名文件。默认为 `False`。

`schedule_url` 是教务处作息时间页面链接。是的，本 notebook 需要网络以获得作息时间信息。默认为 `'http://jwc.scuteo.com/jiaowuchu/cms/category/index.do?id=f25701314e913361014e91450b510010'`。

### 高级模式

```
main(file='blabla.xls', advance=True)
```

在 notebook 同一文件夹下找到你的 `blabla.py` 还有 `blabla.ics`。然后用你喜欢的编辑器把 `blabla.py` 里面的内容修改掉，接着：

```
main(file='blabla.py')
```

大功告成！在 notebook 同一文件夹下找到你的新的 `blabla.ics`。

### `blabla.py` 文件的内容

`dict` 的 `list`，除了最后一项是第一个星期一的 `date`。其中，`dict` 必须具有

* `name`（课程名称，`str`）
* `week_day`（星期几，`int`）
* `class_period`（第几到几节课，`int` 的二元组）

三对键值。

其余

* `exclude_week`（从第一次上课到最后一次上课，中间哪几周不上，`int` 的 `list`）
* `location`（`str`）
* `professor`（`str`）
* `student_number`（选课人数，`int` 或 `str`）
* `week_period`（第几到几周，`int` 的二元组）
* `week_period_text`（哪几周上课的描述，`str`）

是可选的。

注意最好给出 `week_period`，如果 `week_period` 键值对不存在，将取 `(1, 18)`。

<p style="text-align:center">Made with <span style="color: #e25555;">&hearts;</span> by Franklin</p>