In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from tqdm import tqdm
import seaborn as sns

tqdm.pandas()
import platform

# matplotlib 显示中文的问题
if platform.system() == 'Darwin':
    plt.rcParams["font.family"] = 'Arial Unicode MS'
elif platform.system() == 'Windows':
    plt.rcParams["font.family"] = 'SimHei'
else:
    pass

In [None]:
rawdata = pd.read_csv("数据集/Ordos.csv", parse_dates=['DATE'])
rawdata['DATE_YEAR'] = rawdata['DATE'].dt.year
rawdata['DATE_MONTH'] = rawdata['DATE'].dt.month
rawdata['DATE_DAY'] = rawdata['DATE'].dt.day

rawdata['STATION'] = rawdata['STATION'].apply(lambda x: int(x / 100000))


def split_WND(x) -> pd.Series:
    value = pd.Series(x.split(','), ['DIR', 'DQ', 'State', 'Spd', 'SQ'])
    # value['Spd'] = value['Spd']
    if value['Spd'] == '9999':
        value['Spd'] = np.nan
    else:
        value['Spd'] = float(value['Spd']) / 10

    value['DIR'] = float(value['DIR'])
    value['DQ'] = float(value['DQ'])
    value['Spd'] = float(value['Spd'])
    value['SQ'] = float(value['SQ'])

    return value


rawdata[['DIR', 'DQ', 'State', 'Spd', 'SQ']] = rawdata['WND'].progress_apply(lambda x: split_WND(x))

In [None]:
rawdata.loc[rawdata['DIR'] > 360, ['DIR']] = np.nan
# rawdata

## 月份和风速

In [None]:
import plotly.graph_objects as go

fig = go.Figure()

for i in range(1, 13):
    fig.add_trace(go.Box(y=rawdata.query(f'DATE_MONTH == {i}')['Spd'].values, name=i))

fig.update_layout(template='simple_white',
                  title="月份和风速的boxplot",
                  xaxis_title='月份',
                  yaxis_title='风速',
                  showlegend=False)
fig.show()


## 月份和风向

In [None]:
fig = go.Figure()

for i in range(1, 13):
    fig.add_trace(go.Box(y=rawdata.query(f'DATE_MONTH == {i}')['DIR'].values, name=i))

fig.update_layout(template='simple_white',
                  title="月份和风向的boxplot",
                  xaxis_title='月份',
                  yaxis_title='风向',
                  showlegend=False)
fig.show()

## 风向和风速两个图放在一起

In [None]:
fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(10, 6), dpi=150, sharey=True)
sns.boxplot(data=rawdata, x='DIR', y='DATE_MONTH', ax=ax[0], orient='h', fliersize=0.5, linewidth=0.7)
sns.boxplot(data=rawdata, x='Spd', y='DATE_MONTH', ax=ax[1], orient='h', fliersize=0.5, linewidth=0.7)

ax[0].set_title("wind DIRECTION")
ax[1].set_title("wind SPEED")

## 绘制玫瑰图


In [None]:
data_200007 = rawdata.loc[(rawdata['DATE_MONTH'] == 7) & (rawdata['DATE_YEAR'] == 2000), :]
data_200007.dtypes

In [None]:
import matplotlib.pyplot as plt
from windrose import WindroseAxes
import matplotlib.cm as cm

fig, ax = plt.subplots(subplot_kw={'projection': 'windrose'}, dpi=100, figsize=(6, 6))
ax.bar(data_200007['DIR'].astype('float'), data_200007['Spd'], normed=True, opening=0.8, edgecolor='black')
ax.set_legend()
plt.show()

In [None]:
fig, ax = plt.subplots(subplot_kw={'projection': 'windrose'}, dpi=100, figsize=(6, 6))
ax.contourf(data_200007['DIR'].astype('float'), data_200007['Spd'], bins=np.arange(0, 9, 1), cmap=cm.hot)
ax.set_legend()
plt.show()

## 绘制2020年的数据


In [None]:
data_2020 = rawdata.loc[rawdata['DATE_YEAR'] == 2020, :].copy()

### 风速和风向分布数据

In [None]:
fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(10, 5), dpi=150)
sns.histplot(data_2020['DIR'], ax=ax[0], kde=True, stat='density')
ax[0].set_title("wind Direction")

sns.histplot(data_2020['Spd'], ax=ax[1], kde=True, stat='density')
ax[1].set_title("wind Speed")
fig.savefig("结果/histplot.png")

### 按16方位风向绘制风向频次统计柱状图

In [None]:
def label_winddirect(winddirect):
    value = np.hstack([np.array(0), np.arange(start=0, stop=16) * 22.5 + 11.25, 360])
    labels = ['N', 'NNE', 'NE', 'ENE', 'E', 'ESE', 'SE', 'SSE', 'S', 'SSW', 'SW', 'WSW', 'W',
              'WNW', 'NW', 'NNW', 'N']
    for index in range(value.shape[0] - 1):
        if ((winddirect >= value[index]) & (winddirect < value[index + 1])):
            return labels[index]


data_2020['DIR_cut'] = data_2020['DIR'].progress_apply(lambda x: label_winddirect(x))

In [None]:
data_2020_wd = data_2020.groupby(['DIR_cut']).agg(
    count=('DIR_cut', 'count')
).reset_index()

data_2020_wd['DIR_cut'] = pd.Categorical(values=data_2020_wd['DIR_cut'],
                                         categories=['N', 'NNE', 'NE', 'ENE', 'E', 'ESE', 'SE', 'SSE', 'S', 'SSW', 'SW',
                                                     'WSW', 'W', 'WNW', 'NW', 'NNW'], ordered=True)
data_2020_wd = data_2020_wd.sort_values(by=['DIR_cut'])
data_2020_wd['frequency'] = data_2020_wd['count'] / data_2020_wd['count'].sum()

fig, ax = plt.subplots(figsize=(10, 5), dpi=150)

sns.barplot(data=data_2020_wd, x='DIR_cut', y='frequency', ax=ax)
_ = ax.set_ylabel('Frequency (%)')
_ = ax.set_title("2020年 按16方位风向绘制风向频次统计柱状图")
_ = ax.set_xlabel("")

### 风速分布

In [None]:
data_07 = rawdata.loc[rawdata['DATE_MONTH'] == 7, :].copy()
fig, ax = plt.subplots(figsize=(10, 5), dpi=150)
sns.histplot(data_07['Spd'][~pd.isna(data_07['Spd'])], ax=ax, stat='density', bins=20)

### 计算风速的分布的相关参数，以stats.weibull_min为基础

效果不太好，后期再具体介绍

In [None]:
from scipy import stats

shape, loc, scale = stats.weibull_min.fit(data_07['Spd'][~pd.isna(data_07['Spd'])], floc=0)
x = np.arange(0, 20, 0.01)

fig, ax = plt.subplots(figsize=(10, 5), dpi=150)
sns.histplot(data_07['Spd'][~pd.isna(data_07['Spd'])], stat='density', ax=ax, bins=20)
sns.lineplot(x, stats.weibull_min.pdf(x, c=shape, scale=scale, loc=loc), ax=ax)