# 企业洞察

- 近 10 年全球企业 OpenRank 演变图
  - 绘制 Bump Charts
- 近 10 年中国企业 OpenRank 演变图
  - 绘制 Bump Charts


In [1]:
import sys
from typing import List
import time

import requests
from pydantic import BaseModel


class OpenRankEntryItem(BaseModel):
    name: str


class OpenRankEntry(BaseModel):
    item: OpenRankEntryItem
    rank: int
    value: float
    rankDelta: int
    valueDelta: float


class OpenRank(BaseModel):
    type: str
    # 如果是月度的 openrank 这个字段是 str 类型
    time: int | str
    data: List[OpenRankEntry]


# 根据实践 xlab-open-source.oss-cn-beijing 只有 2015 开始的数据
# 由于按照年份的数据统计步长过大，所以单独追加 23 年 12 月份的数据
year_range = [year for year in range(2015, 2024)] + ["202312"]

chinese_list: List[OpenRank] = []

for year in year_range:
    region = "global"  # chinese / global
    url = f"http://xlab-open-source.oss-cn-beijing.aliyuncs.com/open_leaderboard/open_rank/company/{region}/{year}.json"

    try:
        response = requests.get(
            url,
            headers={
                "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0"
            },
        )
        response.raise_for_status()

    except Exception as e:
        print(f"Error fetching data for year {year}: {e}")
        sys.exit()

    if response.status_code == 200:
        raw_dict = response.json()
        chinese_list.append(OpenRank.model_validate(raw_dict))
    else:
        chinese_list.append(None)

    time.sleep(0.5)

# TODO 做一下数据缓存

In [2]:
chinese_list[0].data[0].model_dump()

{'item': {'name': 'Google'},
 'rank': 1,
 'value': 25370.18,
 'rankDelta': 0,
 'valueDelta': 25370.18}

In [3]:
it = chinese_list[-1].data[:15]
it = map(lambda x: x.item.name, it)
it = list(it)
# 选出最后一年排名前 15 的公司作为 finial_list
final_list = it
it

['Microsoft',
 'Huawei',
 'Google',
 'Amazon',
 'Meta',
 'Alibaba',
 'Elastic',
 'HashiCorp',
 'Ant group',
 'VMWare',
 'Intel',
 'Baidu',
 'IBM',
 'NVIDIA',
 'Odoo']

In [4]:
import pandas as pd


class BumpChartItem(BaseModel):
    date: str
    symbol: str
    price: float


chinese_bump_chart_list: List[BumpChartItem] = []

for year_entry in chinese_list:
    for entry in year_entry.data:
        # 过滤掉不在 final_list 中的项目
        if entry.item.name not in final_list:
            continue

        # 由于 2023 年 12 月份的数据是被当做 2024 年的，所以需要做一下转换
        if year_entry.time == "202312":
            year_entry.time = "2024"

        chinese_bump_chart_list.append(
            BumpChartItem(
                date=str(year_entry.time),
                symbol=entry.item.name,
                price=entry.value,
            ).model_dump()  # 虽然看起来很奇怪，但是没办法。
        )


chinese_bump_chart_df = pd.DataFrame(chinese_bump_chart_list)

chinese_bump_chart_df[:5]

Unnamed: 0,date,symbol,price
0,2015,Google,25370.18
1,2015,Microsoft,17507.17
2,2015,Meta,11307.88
3,2015,Elastic,6784.51
4,2015,VMWare,4380.96


In [5]:
import altair as alt

color_scheme = alt.Scale(
    domain=final_list,
    range=[
        "#1f77b4",
        "#aec7e8",
        "#ff7f0e",
        "#ffbb78",
        "#2ca02c",
        "#98df8a",
        "#d62728",
        "#ff9896",
        "#9467bd",
        "#c5b0d5",
        "#8c564b",
        "#c49c94",
        "#e377c2",
        "#f7b6d2",
        "#7f7f7f",
    ],
)

result = (
    alt.Chart(chinese_bump_chart_df)
    .mark_line(point=True, radius=2, opacity=0.5, interpolate="monotone")
    .encode(
        x=alt.X("date:O").title("时间"),
        y="rank:O",
        color=alt.Color("symbol:N", scale=color_scheme),
    )
    .transform_window(
        rank="rank()",
        sort=[alt.SortField("price", order="descending")],
        groupby=["date"],
    )
    .properties(
        title="全球企业 2015~2024 OpenRank 排名变化",
        width=600,
        height=400,
    )
    .configure_view(
        # 设置图表的背景颜色
        strokeWidth=1,
        stroke="white",
    )
    .configure_axisX(
        labelAngle=0,
    )
    .configure_axis(
        # 美化坐标轴
        grid=True,
        gridCap="round",
        gridColor="#e1e1e1",
        gridDash=[4, 2],  # 这会创建一个虚线
        # labelColor="#e1e1e1",
        tickColor="#e1e1e1",
        tickWidth=0,
        domainColor="#e1e1e1",
        domain=False,
        domainDash=[4, 2],
    )
)

result.save(fp="openrank_global_bump_chart.png", format="png", scale_factor=10)

jchart = alt.JupyterChart(result)
jchart

JupyterChart(spec={'config': {'view': {'continuousWidth': 300, 'continuousHeight': 300, 'stroke': 'white', 'st…