# **getStationInfo.ipynb**

Author: Zhixian Yang

Email: [yangzhx28@mail2.sysu.edu.cn](mailto:yangzhx28@mail2.sysu.edu.cn) or [yimu01439@gmail.com](mailto:yimu01439@gmail.com)

GitHub: [https://github.com/koar-create](https://github.com/koar-create)

Date created: August 2th, 2023

Last modified: August 10th, 2023

<br><br>

---

<br><br>

## **Description**
None.

In [1]:
import os, sys, time, glob
import pytz
import requests
import platform
import numpy as np, pandas as pd
from datetime import datetime


def getStationInfo(dirname=None):
    # read 'China_SURF_Station.xlsx'
    df1 = pd.read_excel(os.path.join(dirname, 'China_SURF_Station.xlsx'), sheet_name='开放站点')
    df1.set_index('区站号', inplace=True)
    # if stations.csv does not exist, download it first
    url = 'https://raw.githubusercontent.com/crazyapril/ChinaNationalWeatherStations/master/stations.csv'
    if not os.path.exists(os.path.join(dirname, 'stations.csv')):
        urllib.request.urlretrieve(url, os.path.join(dirname, 'stations.csv'))
    
    # process stations.csv: 
    unavai_sites = [53716, 53801, 54175, 54456, 54458, 54501, 54578, 54661, 54733, 54761, 57036, 58444, 58469, 58474, 58599, 58696, 58726, 58755, 58958, 59668, 59755, 59759, 59852, 59936, 59997]
    df = pd.read_csv(os.path.join(dirname, 'stations.csv')) #step1: read data
    df.rename(columns={'站号': '区站号'}, inplace=True) #step2: rename column '站号' to '区站号'
    df['区站号'] = df['区站号'].astype(np.int64) #step3: "str to int64"
    df = df.set_index('区站号') #step4: set column '区站号' as index (for ease of calculation)
    df.drop(unavai_sites, inplace=True) #step5: remove useless sites
    for idx in df.index:
        df.loc[idx, '站类'] = df1.loc[idx, '站类'] if (idx in df1.index) else np.nan #step6: assign column '站类'
    df = df.reset_index() #step7: reset index
    new_order = ['区站号', '省份', '站名', '站类', '经度', '纬度', '测站海拔']
    df = df.loc[:, new_order] #step8: reorder columns
    df.loc[df['区站号'] == 58424, '经度'] = 117.06 #step9: 58424: anomalous latitude and longitude
    df.loc[df['区站号'] == 58424, '纬度'] = 30.54
    
    return df

def saveStationInfo(df=None, dirname=None):
    # save df as stations.xlsx
    filename = 'stations.xlsx'
    if not os.path.exists(os.path.join(dirname, filename)):
        df.to_excel(os.path.join(dirname, filename), sheet_name='开放站点', index=False)

# specify dirname and data's dirname
if platform.system() == 'Linux':
    dirname = os.getcwd()
elif platform.system() == 'Windows':
    dirname = os.path.join("D:\\Documents", "A-threads", "less important ones", "thread2308-4_try_to_purchase_chinese_station_api")
data_dirname = os.path.join(dirname, 'data')

df = getStationInfo(dirname=dirname)
saveStationInfo(df=df, dirname=dirname)