# 교양 강의 리스트 (서울캠퍼스)
# 대학 강의 리스트 (서울캠퍼스)
# 대학원 강의 리스트 (일반대학원)
## by 고려대 개발자
---

In [1]:
import requests
from pprint import pprint
from bs4 import BeautifulSoup
import re
import pandas as pd

# 교양 강의 리스트

In [2]:
def get_cult_lectures(year="2018", term="1R"):

    def get_depts():
        url = "http://sugang.korea.ac.kr/lecture/LecEtcSub.jsp?lang=KOR"
        response_text = requests.get(url).text
        bs = BeautifulSoup(response_text, "html.parser")
        cols1 = [{"col": option["value"], "col_name": option.text,
                  "dept": option["value"], "dept_name": option.text}
                 for option in bs.select("[name='col'] option")[1:]]
        url = "http://sugang.korea.ac.kr/lecture/LecDeptPopup.jsp?frm=frm_ets&colcd=01&deptcd=&dept=dept&lang=KOR"
        response_text = requests.get(url).text
        bs = BeautifulSoup(response_text, "html.parser")
        script_lines = bs.select_one("script").text.replace("\r", "").replace("\t", "").split("\n")
        value_filter = re.compile('el.value =')
        text_filter = re.compile('el.text =')
        color_filter = re.compile('el.style.color =')
        value_filtered_lines = list(filter(value_filter.search, script_lines))
        text_filtered_lines = list(filter(text_filter.search, script_lines))
        color_filtered_lines = list(filter(color_filter.search, script_lines))
        values = [line.split("\"")[1] for line in value_filtered_lines]
        texts = [line.split("\"")[1] for line in text_filtered_lines]
        colors = [line.split("\"")[1] for line in color_filtered_lines]
        cols2 = [{'col': '01', 'col_name': '교양', "dept": value, "dept_name": texts[i]}
                 for i, value in enumerate(values)
                 if colors[i] == "black"]
        depts = [*cols2, *cols1]
        return depts
    
    def _preproc(text):
        return text.replace("\t", "").replace("\n", "").replace("\xa0", "").replace("\r", " ")
    
    def _get_lecture_info(row):
        divide = _preproc(row.select_one("td:nth-of-type(2)").text)
        title = _preproc(row.select_one("td:nth-of-type(4)").text)
        professor = _preproc(row.select_one("td:nth-of-type(5)").text)
        credit = _preproc(row.select_one("td:nth-of-type(6)").text)
        time_ = _preproc(row.select_one("td:nth-of-type(7)").text)
        lecture_info = {"courseDivide": divide, "courseTitle": title, "courseProfessor": professor,
                        "courseCredit": credit, "courseTime": time_}
        return lecture_info
    
    def get_lectures(dept):
        url = "http://sugang.korea.ac.kr/lecture/LecEtcSub.jsp?lang=KOR"
        params = {'yy': year, 'tm': term, 'campus': '1', 'col': dept["col"]}
        if dept["dept"] != dept["col"]:
            params["dept"] = dept["dept"]
        response_text = requests.post(url, data=params).text
        bs = BeautifulSoup(response_text, "html.parser")
        try:
            rows = bs.select_one("tbody").select("tr")
            lecture_infos = [_get_lecture_info(row) for row in rows]
            lectures = [{"courseUniversity": "학부", "courseYear": year, "courseTerm": term,
                         "courseArea": "교양", "courseMajor": dept["dept_name"], **lecture_info} 
                        for lecture_info in lecture_infos]
        except:
            lectures = []
        return lectures

    depts = get_depts()
    lectures = sum([get_lectures(dept) for dept in depts], [])

    return lectures

# 대학 강의 리스트

In [3]:
def get_univ_lectures(year="2018", term="1R"):
    
    def get_cols():
        url = "http://sugang.korea.ac.kr/lecture/LecMajorSub.jsp?lang=KOR"
        response_text = requests.get(url).text
        bs = BeautifulSoup(response_text, "html.parser")
        cols = [{"col_name": option.text, "col": option["value"]} for option in bs.select("#col option")[1:]
                if option.text[-3:] != "(관)"]
        return cols

    def get_depts(col):
        url = "http://sugang.korea.ac.kr/lecture/LecDeptPopup.jsp?frm=frm_ms&deptcd=&dept=dept&lang=KOR"
        params = {"colcd": col["col"], "year": year, "term": term}
        response_text = requests.get(url, params=params).text
        bs = BeautifulSoup(response_text, "html.parser")
        script_lines = bs.select_one("script").text.replace("\r", "").replace("\t", "").split("\n")
        value_filter = re.compile('el.value =')
        text_filter = re.compile('el.text =')
        value_filtered_lines = list(filter(value_filter.search, script_lines))
        text_filtered_lines = list(filter(text_filter.search, script_lines))
        values = [line.split("\"")[1] for line in value_filtered_lines]
        texts = [line.split("\"")[1] for line in text_filtered_lines]
        depts = [{"col_name": col["col_name"], "col": col["col"], "dept_name": texts[i], "dept": value}
                 for i, value in enumerate(values)]
        return depts
    
    def _preproc(text):
        return text.replace("\t", "").replace("\n", "").replace("\xa0", "").replace("\r", " ")
    
    def _get_lecture_info(row):
        divide = _preproc(row.select_one("td:nth-of-type(3)").text)
        title = _preproc(row.select_one("td:nth-of-type(5)").text)
        professor = _preproc(row.select_one("td:nth-of-type(6)").text)
        credit = _preproc(row.select_one("td:nth-of-type(7)").text)
        time_ = _preproc(row.select_one("td:nth-of-type(8)").text)
        lecture_info = {"courseDivide": divide, "courseTitle": title, "courseProfessor": professor,
                        "courseCredit": credit, "courseTime": time_}
        return lecture_info
    
    def get_lectures(dept):
        url = "http://sugang.korea.ac.kr/lecture/LecMajorSub.jsp?lang=KOR"
        params = {'yy': year, 'tm': term, 'sCampus': '1', 'col': dept["col"], 'dept': dept["dept"]}
        response_text = requests.post(url, data=params).text
        bs = BeautifulSoup(response_text, "html.parser")
        try:
            rows = bs.select_one("tbody").select("tr")
            lecture_infos = [_get_lecture_info(row) for row in rows]
            lectures = [{"courseUniversity": "학부", "courseYear": year, "courseTerm": term,
                         "courseArea": "전공", "courseMajor": dept["dept_name"], **lecture_info} 
                        for lecture_info in lecture_infos]
        except:
            lectures = []
        return lectures

    cols = get_cols()
    depts = sum([get_depts(col) for col in cols], [])
    lectures = sum([get_lectures(dept) for dept in depts], [])
    
    return lectures

# 대학원 강의 리스트

In [4]:
def get_grad_lectures(year="2018", term="1R"):

    def get_depts():
        url = "http://sugang.korea.ac.kr/lecture/LecDeptPopup.jsp?frm=frm_gms&colcd=0309&deptcd=&dept=dept&lang=KOR"
        response_text = requests.get(url).text
        bs = BeautifulSoup(response_text, "html.parser")
        script_lines = bs.select_one("script").text.replace("\r", "").replace("\t", "").split("\n")
        value_filter = re.compile('el.value =')
        text_filter = re.compile('el.text =')
        value_filtered_lines = list(filter(value_filter.search, script_lines))
        text_filtered_lines = list(filter(text_filter.search, script_lines))
        values = [line.split("\"")[1] for line in value_filtered_lines]
        texts = [line.split("\"")[1] for line in text_filtered_lines]
        depts = [{"col_name": "일반대학원", "col": "0309", "dept_name": texts[i], "dept": value}
                 for i, value in enumerate(values)]
        return depts

    def _preproc(text):
        return text.replace("\t", "").replace("\n", "").replace("\xa0", "").replace("\r", " ")
    
    def _get_lecture_info(row):
        divide = _preproc(row.select_one("td:nth-of-type(3)").text)
        title = _preproc(row.select_one("td:nth-of-type(5)").text)
        professor = _preproc(row.select_one("td:nth-of-type(6)").text)
        credit = _preproc(row.select_one("td:nth-of-type(7)").text)
        time_ = _preproc(row.select_one("td:nth-of-type(8)").text)
        lecture_info = {"courseDivide": divide, "courseTitle": title, "courseProfessor": professor,
                        "courseCredit": credit, "courseTime": time_}
        return lecture_info
    
    def get_lectures(dept):
        url = "http://sugang.korea.ac.kr/lecture/LecGradMajorSub.jsp?lang=KOR"
        params = {'yy': year, 'tm': term, 'col': dept["col"], 'dept': dept["dept"]}
        response_text = requests.post(url, data=params).text
        bs = BeautifulSoup(response_text, "html.parser")
        try:
            rows = bs.select_one("tbody").select("tr")
            lecture_infos = [_get_lecture_info(row) for row in rows]
            lectures = [{"courseUniversity": "대학원", "courseYear": year, "courseTerm": term,
                         "courseArea": "일반대학원", "courseMajor": dept["dept_name"], **lecture_info} 
                        for lecture_info in lecture_infos]
        except:
            lectures = []
        return lectures

    depts = get_depts()
    lectures = sum([get_lectures(dept) for dept in depts], [])

    return lectures

---

In [5]:
years = ["2017", "2018"]
terms = ["1R", "2R"]
argss = [(year, term) for year in years for term in terms]
argss.remove(("2017", "1R"))
argss.remove(("2018", "2R"))

In [6]:
cults = sum([get_cult_lectures(*args) for args in argss], [])
print(len(cults))

2756


In [7]:
univs = sum([get_univ_lectures(*args) for args in argss], [])
print(len(univs))

3346


In [8]:
grads = sum([get_grad_lectures(*args) for args in argss], [])
print(len(grads))

3980


In [12]:
total_lectures = cults + univs + grads
print(len(total_lectures))

10082


In [42]:
def convert_room2time(text, time_text=""):
    
    proc = re.compile("[월화수목금]\(\d-{0,1}\d{0,1}\)")
    time_table = {weekday: [] for weekday in "월화수목금"}
    raw_times = proc.findall(text)
    
    for raw_time in raw_times:
        week_proc = re.compile("[월화수목금]")
        week = week_proc.search(raw_time).group()
        time_proc = re.compile("\d")
        times = time_proc.findall(raw_time)
        for time in times:
            time_table[week].append(time)
    
    for key in time_table:
        day = time_table[key]
        if len(day) > 0:
            day = sorted(set(day))
            time_text += "%s:" % key
            for time in day:
                time_text += "[%s]" % time
        
    return time_text

In [43]:
columns = ["courseID","courseUniversity","courseYear","courseTerm",
           "courseArea","courseMajor","courseGrade","courseTitle",
           "courseCredit","courseDivide","coursePersonnel","courseProfessor",
           "courseTime","courseRoom"]
df = pd.DataFrame(total_lectures, columns=columns)

df["courseRoom"] = df["courseTime"]
df["courseTime"] = [convert_room2time(text) for text in df["courseRoom"]]

text_proc = re.compile("\S+")
df["courseTitle"] = [" ".join(text_proc.findall(text)) for text in df["courseTitle"]]
df["courseRoom"] = [" ".join(text_proc.findall(text)) for text in df["courseRoom"]]

credit_proc = re.compile("\d+")
df["courseCredit"] = [int(credit_proc.search(s).group()) for s in df["courseCredit"]]

df["coursePersonnel"] = [100 for _ in range(len(df))]

termMap = {"1R": "1학기", "1S": "여름학기", "2R": "2학기", "2W": "겨울학기"}
df["courseTerm"] = [termMap[term] for term in df["courseTerm"]]

df = df.fillna("")
df.to_csv("COURSE.csv", index=False, header=False, encoding="utf-8")