In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import numpy as np
import matplotlib
from tqdm  import tqdm
from selenium import webdriver
from selenium.webdriver.common.by import By

headers = {
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36'
}

### 主幹航線準確率

In [2]:
res = requests.get('https://www.sse.net.cn/index/singleIndex?indexType=gcspi')
soup = BeautifulSoup(res.content, 'lxml')

In [20]:
# Extract the tables from the HTML content
tables = soup.find_all('table')

# Function to extract table data
def parse_table(table):
    data = []
    rows = table.find_all('tr')

    # Get the headers
    headers = [header.get_text() for header in rows[0].find_all('td')]
    data.append(headers)

    # Get the rest of the rows
    count = 0
    for row in rows:
        cols = row.find_all('td')
        cols = [ele.get_text(strip=True) for ele in cols]
        data.append(cols)
        count+=1

    return pd.DataFrame(data[1:])

# Extract data from both tables
df_comprehensive = parse_table(tables[0])  # Comprehensive Punctuality Index
df_trunk_routes = parse_table(tables[1])   # Global Trunk Routes Punctuality

# Extract additional titles (assuming these titles are displayed somewhere in the HTML)
comprehensive_title = "全球主干航线综合准班率指数"
trunk_routes_title = "全球主干航线到离港/收发货准班率指数"


In [33]:
df_comprehensive.to_excel('7_上海航運交易所_全球主幹航線綜合準班率指數.xlsx', header = 0, index = 0)

In [26]:
df_trunk_routes.iloc[0] = [df_trunk_routes.iloc[0][0], df_trunk_routes.iloc[0][1], df_trunk_routes.iloc[0][1], df_trunk_routes.iloc[0][2], df_trunk_routes.iloc[0][2]]
df_trunk_routes.iloc[1] = ['',df_trunk_routes.iloc[1][0], df_trunk_routes.iloc[1][1], df_trunk_routes.iloc[1][2], df_trunk_routes.iloc[1][3]]

In [34]:
df_trunk_routes.to_excel('7_上海航運交易所_全球主幹航線到離港與收發獲準班率指數.xlsx', header = 0, index = 0)

### 港口班輪準確率

In [76]:
driver = webdriver.Chrome()
driver.get('https://www.sse.net.cn/index/singleIndex?indexType=gcspi_port')
driver.maximize_window()

In [78]:
rows = driver.find_elements(By.XPATH, '//*[@id="right"]/table')[0].text.split('\n')
new_rows = []
for i in range(4,len(rows)):
    tmp = rows[i].split(' ')
    new_rows.append({'排名': tmp[0], '港口': tmp[1], '准班率(%)':tmp[2], '挂靠数':tmp[4], '班期综合服务水平': tmp[6], '在港时间(天)':tmp[7], '在泊時間(天)':tmp[8]})

In [100]:
df = pd.DataFrame(new_rows)
df_trunk_routes.to_excel('7_上海航運交易所_港口班輪準確率.xlsx', header = 0, index = 0)

### 一帶一路航貿指數

In [106]:
res = requests.get('https://www.sse.net.cn/index/singleIndex?indexType=brsti')

In [107]:
soup = BeautifulSoup(res.content, 'lxml')

tables = soup.find_all('table')
rows = tables[0].find_all('tr')

In [117]:
data = []
for i in range(0,len(rows)):
    data.append([header.get_text() for header in rows[i].find_all('td')])

df = pd.DataFrame(data)
df.to_excel('7_上海航運交易所_一帶一路航貿指數.xlsx', header = 0, index = 0)


### 一帶一路貿易額指數

In [151]:
res = requests.get('https://www.sse.net.cn/index/singleIndex?indexType=brtvi')

In [152]:
soup = BeautifulSoup(res.content, 'lxml')

tables = soup.find_all('table')
rows = tables[0].find_all('tr')

In [153]:
data = []
for i in range(0,len(rows)):
    data.append([header.get_text() for header in rows[i].find_all('td')])

df = pd.DataFrame(data)
df.to_excel('7_上海航運交易所_一帶一路貿易額指數.xlsx', header = 0, index = 0)

### 一帶一路集裝箱海運量指數

In [172]:
driver = webdriver.Chrome()
driver.get('https://www.sse.net.cn/index/singleIndex?indexType=brcvi')
driver.maximize_window()

In [173]:
rows = driver.find_elements(By.XPATH, '//*[@id="right"]/table')[0].text.split('\n')

In [174]:
data = []
for i in range(2,len(rows)):
    tmp = rows[i].split(' ')
    if len(tmp)<4:
        tmp.insert(1,'')
    data.append(tmp)

In [175]:
df = pd.DataFrame(data, columns=['指數','權重', '本期', '與上期比漲跌'])
df.to_excel('7_上海航運交易所_集裝箱海運量指數.xlsx', index = 0)

### 海上絲綢之路運價指數

In [160]:
driver = webdriver.Chrome()
driver.get('https://www.sse.net.cn/index/singleIndex?indexType=srfi')
driver.maximize_window()

In [161]:
rows = driver.find_elements(By.XPATH, '//*[@id="right"]/table')[0].text.split('\n')

In [166]:
data = []
for i in range(2,len(rows)):
    tmp = rows[i].split(' ')
    if len(tmp)<4:
        tmp.insert(1,'')
    data.append(tmp)

In [171]:
df = pd.DataFrame(data, columns=['指數','權重', '本期', '與上期比漲跌'])
df.to_excel('7_上海航運交易所_海上絲綢之路運價指數.xlsx', index = 0)