# World Top CS Universities [2021]

## Import Modules

In [1]:
import csv
from collections import defaultdict
from dataclasses import dataclass
from itertools import islice
from statistics import mean

## Define Model

In [2]:
@dataclass
class Uni:
    """Represent university."""
    name: str
    rank: int

## Load Rankings

In [3]:
def load_ranking(path: str, delimiter: str = None, limit: int = None) -> list[Uni]:
    """Load ranking."""
    with open(path) as file:
        reader = csv.reader(file, delimiter=delimiter)
        return [Uni(name, int(rank)) for name, rank in islice(reader, limit)]

# Load top-50 unis
ranking_qs = load_ranking("data/ranking-qs-cs-2021.tsv", "\t", 50)
ranking_the = load_ranking("data/ranking-the-cs-2021.tsv", "\t", 50)
ranking_arwu = load_ranking("data/ranking-arwu-cs-2020.tsv", "\t", 50)

## Preview Rankings

In [4]:
def preview_ranking(ranking: list[Uni], limit: int = 10) -> None:
    """Preview ranking."""
    for i in range(limit):
        uni = ranking[i]
        print(f"{uni.name}\t{uni.rank}")
    print(
        f"-----------------------------\n"
        f"Length: {len(ranking)}"
    )

In [5]:
preview_ranking(ranking_qs)

Massachusetts Institute of Technology (MIT)	1
Stanford University	2
Carnegie Mellon University	3
National University of Singapore (NUS)	4
University of California, Berkeley (UCB)	5
University of Oxford	6
Harvard University	7
University of Cambridge	8
Ecole Polytechnique Federale de Lausanne (EPFL)	9
ETH Zurich - Swiss Federal Institute of Technology	10
-----------------------------
Length: 50


In [6]:
preview_ranking(ranking_the)

University of Oxford	1
Stanford University	2
Massachusetts Institute of Technology (MIT)	3
ETH Zurich - Swiss Federal Institute of Technology	4
Carnegie Mellon University	5
University of Cambridge	6
Harvard University	7
National University of Singapore (NUS)	8
University of California, Berkeley (UCB)	9
Imperial College London	10
-----------------------------
Length: 50


In [7]:
preview_ranking(ranking_arwu)

Massachusetts Institute of Technology (MIT)	1
Stanford University	2
University of California, Berkeley (UCB)	3
Carnegie Mellon University	4
ETH Zurich - Swiss Federal Institute of Technology	5
Harvard University	6
Tsinghua University	7
Nanyang Technological University, Singapore (NTU)	8
University of Toronto	9
University of Oxford	10
-----------------------------
Length: 50


## Clean Rankings

Sometimes universities have different names in different rankings. For example, one ranking might use universities' full names while another might prefer acronyms. This might cause duplicates down the line.

To avoid this issue, let's save all university names sorted alphabetically and try to spot duplicates. They typically appear next to each other, since they differ only slightly. Once we spot a duplicate, we pick the preferred name, update our TSV rankings, and repeat this process until no duplicates are left.

In [8]:
unis_qs = {uni.name for uni in ranking_qs}
unis_the = {uni.name for uni in ranking_the}
unis_arwu = {uni.name for uni in ranking_arwu}

unis_all = unis_qs | unis_the | unis_arwu

with open("data/universities_sorted.tsv", "w") as file:
    for uni in sorted(unis_all):
        file.write(f"{uni}\n")
    file.write(f"Length: {len(unis_all)}")

## Combine Rankings

In [9]:
rankings = [ranking_qs, ranking_the, ranking_arwu]

uni_ranks = defaultdict(list)
for ranking in rankings:
    for uni in ranking:
        uni_ranks[uni.name].append(uni.rank)

uni_ranks = dict(uni_ranks)

In [10]:
# Fill missing ranks
for _, ranks in uni_ranks.items():
    while len(ranks) < 3:
            ranks.append(51) # Best possible rank when not in top-50

In [11]:
# Average ranks
ranking_all = [Uni(name, mean(ranks)) for name, ranks in uni_ranks.items()]

## Save Final Ranking

In [12]:
def save_ranking(ranking: list[Uni], limit: int = None) -> None:
    """Save ranking to file and preview it."""
    ranking_sorted = [uni for uni in sorted(ranking_all, key=lambda uni: uni.rank)]
    with open("data/ranking_global_cs_2021.tsv", "w") as file:
        for i, uni in enumerate(ranking_sorted, start=1):
            row = f"{i}\t{uni.name}\t{uni.rank:.2f}"
            file.write(f"{row}\n")
            print(row)  # Preview
            if i == limit:
                break

save_ranking(ranking_all, 60)

1	Massachusetts Institute of Technology (MIT)	1.67
2	Stanford University	2.00
3	Carnegie Mellon University	4.00
4	University of California, Berkeley (UCB)	5.67
5	University of Oxford	5.67
6	ETH Zurich - Swiss Federal Institute of Technology	6.33
7	Harvard University	6.67
8	National University of Singapore (NUS)	10.00
9	Tsinghua University	10.67
10	Princeton University	12.33
11	Nanyang Technological University, Singapore (NTU)	12.67
12	University of Toronto	14.33
13	University of California, Los Angeles (UCLA)	14.67
14	Cornell University	15.67
15	University of Cambridge	17.33
16	Imperial College London	20.00
17	University College London (UCL)	21.33
18	Ecole Polytechnique Federale de Lausanne (EPFL)	23.00
19	University of Washington	23.00
20	Peking University	24.00
21	University of Texas at Austin	24.67
22	Columbia University	26.00
23	University of Edinburgh	26.33
24	Georgia Institute of Technology	26.67
25	Shanghai Jiao Tong University	27.67
26	New York University (NYU)	28.00
27	Univers