# World Top CS Universities [2020]

## Import Modules

In [1]:
import csv
from typing import List

## Read Rankings

Our data sources are the following:

* [QS: World University Rankings 2020 — Computer Science & Information Systems](https://www.topuniversities.com/university-rankings/university-subject-rankings/2020/computer-science-information-systems)
* [Times Higher Education: World University Rankings 2020 — Computer Science](https://www.timeshighereducation.com/world-university-rankings/2020/subject-ranking/computer-science)
* [Shanghai Ranking Consultancy: Academic Ranking of World Universities 2019 — Computer Science & Engineering](http://www.shanghairanking.com/Shanghairanking-Subject-Rankings/computer-science-engineering.html)


In [2]:
def read_tsv(path: str) -> List:
    with open(path) as tsv_file:
        reader = csv.reader(tsv_file, delimiter="\t")
        lines = list(reader)[:50]  # We'll stick to the top-50 institutions

        return lines

ranking_the = read_tsv("data/ranking-the-2020-world-cs.tsv")
ranking_qs = read_tsv("data/ranking-qs-2020-world-cs.tsv")
ranking_shanghai = read_tsv("data/ranking-shanghai-2019-world-cs.tsv")

## Preview Rankings

### QS Ranking

In [3]:
def preview(ranking: List, line_cnt: int = 10) -> None:
    print(f"Length: {len(ranking)}")

    for i in range(line_cnt):
        rank, uni = ranking[i]

        print(f"{rank}\t{uni}")

preview(ranking_qs)

Length: 50
1	Massachusetts Institute of Technology (MIT)
2	Stanford University
3	Carnegie Mellon University
4	University of California, Berkeley
5	University of Oxford
6	University of Cambridge
7	Harvard University
8	Ecole Polytechnique FÃ©dÃ©rale de Lausanne (EPFL)
9	ETH Zurich
10	University of Toronto


### THE Ranking

In [4]:
preview(ranking_the)

Length: 50
1	University of Oxford
2	Stanford University
3	ETH Zurich
4	Massachusetts Institute of Technology (MIT)
5	University of Cambridge
6	Carnegie Mellon University
7	Imperial College London
8	Harvard University
9	Princeton University
10	California Institute of Technology (Caltech)


### Shanghai Ranking

In [5]:
preview(ranking_shanghai)

Length: 50
1	Massachusetts Institute of Technology (MIT)
2	Stanford University
3	University of California, Berkeley
4	Carnegie Mellon University
5	ETH Zurich
6	Harvard University
7	Tsinghua University
8	University of California, Los Angeles (UCLA)
9	Princeton University
10	University of Oxford


## Clean Rankings

In [6]:
def get_unis(ranking: List) -> set:
    unis = set()
    for _, uni in ranking:
        unis.add(uni)
    
    return unis

universities_the = get_unis(ranking_the)
universities_qs = get_unis(ranking_qs)
universities_shanghai = get_unis(ranking_shanghai)

In [7]:
# Check sizes
print(len(universities_the))
print(len(universities_qs))
print(len(universities_shanghai))

50
50
50


In [8]:
universities_all = universities_the | universities_qs | universities_shanghai  # To eliminate duplicates

# The file saved below was used to make duplicates more apparent. These were then
# eliminated by harmonizing the univerisity names across the three starting rankings.
with open("data/universities_sorted.tsv", "w") as file:
    file.write(f"{len(universities_all)}\n")
    for uni in sorted(universities_all):
        file.write(f"{uni}\n")

In [9]:
len(universities_all)

71

## Merge Rankings

In [10]:
rankings_per_university = {}

rankings_cleaned = [ranking_the, ranking_qs, ranking_shanghai]

for ranking in rankings_cleaned:
    for rank, uni in ranking:
        rank = int(rank)

        if uni in rankings_per_university:
            rankings_per_university[uni].append(rank)
        else:
            rankings_per_university[uni] = [rank]

In [11]:
print(rankings_per_university)  # Preview

{'University of Oxford': [1, 5, 10], 'Stanford University': [2, 2, 2], 'ETH Zurich': [3, 9, 5], 'Massachusetts Institute of Technology (MIT)': [4, 1, 1], 'University of Cambridge': [5, 6, 27], 'Carnegie Mellon University': [6, 3, 4], 'Imperial College London': [7, 14, 40], 'Harvard University': [8, 7, 6], 'Princeton University': [9, 11, 9], 'California Institute of Technology (Caltech)': [10, 29], 'National University of Singapore': [11, 12, 16], 'University of California, Los Angeles (UCLA)': [12, 15, 8], 'Nanyang Technological University (NTU)': [13, 16, 13], 'Cornell University': [14, 19, 14], 'Tsinghua University': [15, 13, 7], 'Georgia Institute of Technology': [16, 27, 18], 'Hong Kong University of Science and Technology': [17, 26], 'Technical University of Munich': [18, 36], 'University College London (UCL)': [19, 17, 19], 'Ecole Polytechnique FÃ©dÃ©rale de Lausanne (EPFL)': [20, 8, 33], 'Columbia University': [21, 19, 22], 'University of Michigan-Ann Arbor': [22, 48, 20], 'Univ

In [12]:
ranking_2020_world_cs = {}

for uni, ranks in rankings_per_university.items():
    while (len(ranks) < 3):
        ranks.append(100)  # If not present, we default to a #100 ranking.
    
    rank = sum(ranks) / len(ranks)

    ranking_2020_world_cs[uni] = rank 

## Save Final Ranking

In [13]:
def save_ranking(ranking: dict) -> None:
    cnt = 0
    with open("data/ranking_2020_world_cs.tsv", "w") as file:
        for uni, rank in sorted(ranking.items(), key=lambda item: (item[1], item[0])):
            cnt += 1
            line = f"{cnt} \t {uni} \t {rank:.2f}"
            file.write(f"{line}\n")
            if cnt <= 10:
                print(line)  # Let's print the top 10 universities

save_ranking(ranking_2020_world_cs)

1 	 Massachusetts Institute of Technology (MIT) 	 2.00
2 	 Stanford University 	 2.00
3 	 Carnegie Mellon University 	 4.33
4 	 University of Oxford 	 5.33
5 	 ETH Zurich 	 5.67
6 	 Harvard University 	 7.00
7 	 Princeton University 	 9.67
8 	 Tsinghua University 	 11.67
9 	 University of California, Los Angeles (UCLA) 	 11.67
10 	 University of Cambridge 	 12.67


# MOOCs

## Review in ClassCentral DataBase

 - https://www.classcentral.com/providers
 - https://www.classcentral.com/universities
 - https://www.classcentral.com/report/mooc-stats-2019/

# Ref: 
 - https://www.classcentral.com/report/cs-online-courses/