# 数据插值

## 数据准备

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
name_list = ['id', 'latitude', 'longitude', 'altitude', 'year', 'month', 'day', 'avg_temp', 'max_temp',
             'min_temp', 'col1', 'col2', 'col3']
raw_data = pd.read_table("./数据集/插值数据/SURF_CLI_CHN_MUL_DAY-TEM-12001-201905.TXT",
                         header=None, sep='\t')
raw_data = raw_data[0].apply(lambda x: pd.Series([float(i.strip()) for i in x.split(' ') if i != ''], index=name_list))
raw_data

In [None]:
clean_data = raw_data.loc[(raw_data['year'] == 2019) & (raw_data['month'] == 5), :].groupby(
    ['latitude', 'longitude']).agg(
    avg_temp=('avg_temp', 'mean')
).reset_index()

for index in ['latitude', 'longitude']:
    clean_data[index] = clean_data[index] * 0.01
clean_data = clean_data.loc[(clean_data['longitude'] > 20) & (clean_data['latitude'] > 10), :]
clean_data

In [None]:
fig, ax = plt.subplots(figsize=(6, 6), dpi=150)
ax.scatter(clean_data['longitude'], clean_data['latitude'])

## 筛选安徽省的数据

In [None]:
from getchinamap import getchinamap

china_engine = getchinamap.DownloadChmap()
prov_gpd = china_engine.download_province(province_name='安徽省', target='边界')
# prov_gpd = china_engine.download_country(target='边界')

prov_gpd

In [None]:
prov_gpd_valid = prov_gpd.copy()
prov_gpd_valid['geometry'] = prov_gpd_valid.buffer(0)

In [None]:
from shapely.geometry import Point


def detect_pic(x):
    return prov_gpd_valid.contains(Point(x['longitude'], x['latitude']))[0]


def detect_pic_inrect(x):
    bounds_ = prov_gpd_valid.bounds.iloc[0, :]
    minx, miny, maxx, maxy = bounds_.minx, bounds_.miny, bounds_.maxx, bounds_.maxy
    return (minx <= x['longitude']) & (x['longitude'] <= maxx) & (x['latitude'] >= miny) & (x['latitude'] <= maxy)


# clean_data['in_geo'] = clean_data.apply(lambda x: detect_pic(x), axis=1)
clean_data['in_box'] = clean_data.apply(lambda x: detect_pic_inrect(x), axis=1)

In [None]:
prov_pointer_df = clean_data.loc[clean_data['in_box']].reset_index(drop=True)
prov_pointer_df

In [None]:
fig, ax = plt.subplots(figsize=(4, 6))
ax.scatter(prov_pointer_df['longitude'], prov_pointer_df['latitude'])
# ax.scatter(clean_data['longitude'], clean_data['latitude'])
prov_gpd_valid.boundary.plot(ax=ax)
ax.set_xlim(prov_gpd_valid.bounds.minx[0], prov_gpd_valid.bounds.maxx[0])
ax.set_ylim(prov_gpd_valid.bounds.miny[0], prov_gpd_valid.bounds.maxy[0])


In [None]:
prov_gpd_valid.bounds

In [None]:
prov_gpd_valid.bounds.minx[0]

In [None]:
bounds_ = prov_gpd_valid.bounds.iloc[0, :]
minx, miny, maxx, maxy = bounds_.minx, bounds_.miny, bounds_.maxx, bounds_.maxy

In [None]:
longitude_x = np.linspace(start=minx, stop=maxx, num=100)
latitude_y = np.linspace(start=miny, stop=maxy, num=200)

# grid_x, grid_y = np.meshgrid(longitude_x, latitude_y)
# grid_x_list = grid_x.reshape(-1,1)
# grid_y_list = grid_y.reshape(-1, 1)
# grid_df = pd.DataFrame({'grid_x_list':grid_x_list.flatten(), 'grid_y_list':grid_y_list.flatten()})
# grid_df

In [None]:
from scipy import interpolate
# f = interpolate.interp2d(prov_pointer_df['longitude'], prov_pointer_df['latitude'], prov_pointer_df['avg_temp'], kind='quintic')
f = interpolate.interp2d(clean_data['longitude'], clean_data['latitude'], clean_data['avg_temp'], kind='quintic')
# f = interpolate.interp2d(clean_data['longitude'], clean_data['latitude'], clean_data['avg_temp'], kind='cubic')

# f(grid_df['grid_x_list'], grid_df['grid_y_list'])
predict_cubic = f(longitude_x, latitude_y)
predict_cubic.shape

In [None]:
import matplotlib as mpl
from matplotlib import cm
fig, ax = plt.subplots(figsize=(10, 6), dpi=150)
colors = ["#33A02C", "#B2DF8A", "#FDBF6F", "#1F78B4", "#999999", "#E31A1C"]
# ax.scatter(clean_data['longitude'], clean_data['latitude'], c='red')
prov_gpd_valid.boundary.plot(ax=ax, color='white')
# ax_im_bar = ax.contourf(grid_x, grid_y, predict_cubic,cmap=cm.coolwarm) #mpl.colors.LinearSegmentedColormap.from_list("mypalette", colors, N=1000)

ax_im_bar = ax.imshow(predict_cubic, origin='lower',
                      extent=(minx, maxx,miny, maxy),
                      cmap=mpl.colors.LinearSegmentedColormap.from_list("mypalette", colors, N=1000))
# ax.contour(grid_x, grid_y, predict_cubic)
ax.scatter(prov_pointer_df['longitude'], prov_pointer_df['latitude'], c='black', s=6)

for index in range(prov_pointer_df.shape[0]):
    ax.text(prov_pointer_df.iloc[index]['longitude'], prov_pointer_df.iloc[index]['latitude'],np.around(prov_pointer_df.iloc[index]['avg_temp'], 2), c='black')
#
# ax.set_xlim(prov_gpd_valid.bounds.minx[0], prov_gpd_valid.bounds.maxx[0])
# ax.set_ylim(prov_gpd_valid.bounds.miny[0], prov_gpd_valid.bounds.maxy[0])
fig.colorbar(ax_im_bar, orientation='vertical')
plt.tight_layout()
plt.savefig("结果/result012901.png")