# 通过大数据和人工智能预测今天气温

## 分析数据与预处理

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['SimHei']  #更改一个有中文的字体
plt.rcParams['axes.unicode_minus']=False  #解决符号不显示问题
df = pd.read_csv("福州天气.csv",sep="\\s+")
df.head()  #-9999是缺失值

Unnamed: 0,年,月,日,时,气温,露点温度,海平面压力,风向,风速,天气条件覆盖代码,液体沉淀深度(一小时),液体沉淀深度(六小时)
0,2020,1,1,0,87,53,10328,340,20,1,-9999,0
1,2020,1,1,1,110,50,-9999,10,30,-9999,-9999,-9999
2,2020,1,1,2,120,60,-9999,360,30,-9999,-9999,-9999
3,2020,1,1,3,135,33,10323,47,12,4,-9999,-9999
4,2020,1,1,4,140,70,-9999,20,40,-9999,-9999,-9999


In [2]:
data=df[['月','日','时','气温']]
data.head()  #我们只需要这些就行

Unnamed: 0,月,日,时,气温
0,1,1,0,87
1,1,1,1,110
2,1,1,2,120
3,1,1,3,135
4,1,1,4,140


In [3]:
data.dtypes

月     int64
日     int64
时     int64
气温    int64
dtype: object

In [4]:
data[data["月"]==2].max().日

29

In [5]:
data.isnull().sum()

月     0
日     0
时     0
气温    0
dtype: int64

In [6]:
data[data==-9999].sum()   #居然没有缺失值，那最好

月     0.0
日     0.0
时     0.0
气温    0.0
dtype: float64

In [7]:
weather = pd.DataFrame(columns=('月', '日', '最低温℃' , '最高温℃'))
def get_max(x,month,day):   #获取最高温
    return x[(x.月==m) & (x.日==d)].气温.max()/10  #原数据
def get_min(x,month,day):    #获取最低温
    return x[(x.月==m) & (x.日==d)].气温.min()/10

for m in range(1,13):
    d_max=int(data[data["月"]==m].max().日)  #轻松得到每个月最大的日期
    for d in range(1,d_max+1):
        weather.loc[weather.index.size] = [m,d,get_min(data,month=m,day=d),get_max(data,month=m,day=d)]

In [8]:
get_max(data,month=1,day=1)

17.9

In [9]:
weather.head()

Unnamed: 0,月,日,最低温℃,最高温℃
0,1.0,1.0,8.5,21.0
1,1.0,2.0,9.9,25.1
2,1.0,3.0,11.4,22.2
3,1.0,4.0,12.7,23.4
4,1.0,5.0,10.0,23.0


In [10]:
# 先训练一个模型，让机器学习最低温与日期的关系
X = weather.values[:,:2]   #日期
Y = weather.values[:,2]    #最低温

from sklearn.model_selection import train_test_split  #随机拆分数据集与测试集
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.2,random_state=66)  #二八原则

#from sklearn.svm import SVR #支持向量机回归器  60%不行
#svr_min = SVR()

#from sklearn.tree import DecisionTreeRegressor  #决策树回归器  #也不行
#reg_min = DecisionTreeRegressor()

from sklearn.ensemble import RandomForestRegressor  #随机森林回归器  yyds
reg_min = RandomForestRegressor(n_estimators=500,n_jobs=-1)   #500棵树，用上电脑全部算力

reg_min.fit(X_test,Y_test)

RandomForestRegressor(n_estimators=500, n_jobs=-1)

In [11]:
reg_min.score(X_test,Y_test)

0.98805504980663

In [12]:
# 如法炮制，让机器学习最高温与日期的关系
X = weather.values[:,:2]   #日期
Y = weather.values[:,3]    #最低温

from sklearn.model_selection import train_test_split  #随机拆分数据集与测试集
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.2,random_state=66)  #二八原则

from sklearn.ensemble import RandomForestRegressor  #随机森林回归器  yyds
reg_max = RandomForestRegressor(n_estimators=500,n_jobs=-1)   #500棵树，用上电脑全部算力

reg_max.fit(X_test,Y_test)

RandomForestRegressor(n_estimators=500, n_jobs=-1)

In [13]:
reg_max.score(X_test,Y_test)

0.9746338220261244

In [18]:
import time
m = time.localtime(time.time()).tm_mon
d = time.localtime(time.time()).tm_mday
print("预计今天最高温:{:.2f}℃,最低温{:.2f}℃".format(reg_max.predict([[m,d]])[0],reg_min.predict([[m,d]])[0]))

预计今天最高温:36.44℃,最低温25.16℃


In [46]:
import datetime
localtime = datetime.datetime.now()+datetime.timedelta(days=1)
dir(localtime.date())

['__add__',
 '__class__',
 '__delattr__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__ne__',
 '__new__',
 '__radd__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__rsub__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__sub__',
 '__subclasshook__',
 'ctime',
 'day',
 'fromisocalendar',
 'fromisoformat',
 'fromordinal',
 'fromtimestamp',
 'isocalendar',
 'isoformat',
 'isoweekday',
 'max',
 'min',
 'month',
 'replace',
 'resolution',
 'strftime',
 'timetuple',
 'today',
 'toordinal',
 'weekday',
 'year']

In [50]:
localtime.month

8