# 导入库

In [1]:
import pandas as pd 

# 导入查看数据

In [2]:
df1 = pd.read_csv('GlobalLandTemperaturesByCountry.csv')
df1.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,Country
0,1743-11-01,4.384,2.294,Åland
1,1743-12-01,,,Åland
2,1744-01-01,,,Åland
3,1744-02-01,,,Åland
4,1744-03-01,,,Åland


# 以国家分组计算历史平均温度

In [3]:
# 经查看，数据是从 1750-2013 年的
df1.tail()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,Country
577457,2013-05-01,19.059,1.022,Zimbabwe
577458,2013-06-01,17.613,0.473,Zimbabwe
577459,2013-07-01,17.0,0.453,Zimbabwe
577460,2013-08-01,19.759,0.717,Zimbabwe
577461,2013-09-01,,,Zimbabwe


In [4]:
# 以国家分组计算 1743-2013 年历史平均温度，并查看缺失值的个数，删除缺失值
df3 = df1.groupby('Country')['AverageTemperature'].mean()
display(sum(df3.isnull()))
df3 = df3.dropna()
display(sum(df3.isnull()))

1

0

In [5]:
df3.head()

Country
Afghanistan       14.045007
Africa            24.074203
Albania           12.610646
Algeria           22.985112
American Samoa    26.611965
Name: AverageTemperature, dtype: float64

# 绘制地图

In [6]:
country = list(df3.index)
v = list(df3.values)

In [7]:
from pyecharts import options as opts
from pyecharts.charts import Map

c = Map()
c.add("", [list(z) for z in zip(country, v)], "world")
c.set_series_opts(label_opts=opts.LabelOpts(is_show=False))
c.set_global_opts(
        title_opts=opts.TitleOpts(title="每个国家历史平均温度（摄氏度）"),
        visualmap_opts=opts.VisualMapOpts(max_=30),
    )
c.render('世界历史平均气温.html')
c.render_notebook()

# 温度走势图

In [8]:
# 取时间中的年
df1['dt'] = pd.to_datetime(df1['dt']).dt.year
df1.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,Country
0,1743,4.384,2.294,Åland
1,1743,,,Åland
2,1744,,,Åland
3,1744,,,Åland
4,1744,,,Åland


In [9]:
# 以年份分组计算全球每年的平均温度，缺失值用前一个值填充
df4 = df1.groupby(['dt'])['AverageTemperature'].mean()
df4 = df4.fillna(axis=0,method='ffill')
df4.head()

dt
1743    5.184140
1744    9.837898
1745    1.387125
1746    1.387125
1747    1.387125
Name: AverageTemperature, dtype: float64

In [10]:
x = list(df4.index)[4:]
x = [str(i) for i in x]
y = list(df4.values)[4:]

In [11]:
import pyecharts.options as opts
from pyecharts.charts import Line


c = Line()
c.add_xaxis(x)
c.add_yaxis("气温（摄氏度）", y,label_opts=opts.LabelOpts(is_show=False))
c.set_global_opts(title_opts=opts.TitleOpts(title="1747-2013年"))
c.render('全球历年温度走势.html')
c.render_notebook()

# 建立预测模型 

In [12]:
# 从走势图上看 1888 年之前的走势不太稳定，由于时间过早科学不发达，
# 所以选取 1888 年及之后的数据
index = x.index('1888')
x = x[index:]
x = [int(i) for i in x]
y = y[index:]

In [13]:
# 使用线性回归模型
from sklearn.linear_model import LinearRegression
import numpy as np
clf = LinearRegression()
x = np.array(x).reshape(126,1) # 将 x,y 重塑为二维矩阵
y = np.array(y).reshape(126,1)
clf.fit(x,y)
display(clf.coef_) # 系数
display(clf.intercept_) # 截距

array([[0.00916559]])

array([0.89717817])

In [14]:
# 模型预测 2014-2020 年的全球平均温度
pre = clf.predict([[2014],[2015],[2016],[2017],[2018],[2019],[2020]]) 
pre

array([[19.35668092],
       [19.36584652],
       [19.37501211],
       [19.3841777 ],
       [19.39334329],
       [19.40250889],
       [19.41167448]])

In [15]:
# 2014-2020 年全球平均温度走势
import pyecharts.options as opts
from pyecharts.charts import Line

x_data = ["2014", "2015", "2016", "2017", "2018", "2019", "2020"]
y_data = [19.35668092,19.36584652,19.3841777,19.39334329,19.40250889,
         19.41167448]


a = Line()
a.set_global_opts(
        tooltip_opts=opts.TooltipOpts(is_show=False),
        xaxis_opts=opts.AxisOpts(type_="category"),
        yaxis_opts=opts.AxisOpts(
            type_="value",
            axistick_opts=opts.AxisTickOpts(is_show=True),
            splitline_opts=opts.SplitLineOpts(is_show=True),
        ),
    )
a.add_xaxis(xaxis_data=x_data)
a.add_yaxis(
        series_name="",
        y_axis=y_data,
        symbol="emptyCircle",
        is_symbol_show=True,
        label_opts=opts.LabelOpts(is_show=False),
    )
a.render('2014-2020年温度.html')
a.render_notebook()