In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import date, timedelta, datetime
import math

from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.metrics import mean_absolute_error, median_absolute_error, mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from time import time

In [None]:
# Считываем данные о min и max температурах
TEMP_MAX = pd.read_csv('input/1754238.csv', delimiter=',', usecols = ['DATE', 'TMAX']) 
TEMP_MIN = pd.read_csv('input/1754238.csv', delimiter=',', usecols = ['DATE', 'TMIN']) 

In [None]:
TEMP_MAX

In [None]:
TEMP_MIN

In [None]:
# Преобразование данныех в один dataset
def rename_cols_rows(data, field_name):
    date = data['DATE'].apply(pd.to_datetime, errors='ignore')
    year = date.map(lambda x: x.year).values
    day_month = date.map(lambda x:x.strftime('%m-%d')).unique()[0]
    return data.drop('DATE', 1).set_index(year).rename(columns={f'{field_name}': f'{day_month}'})

In [None]:
# Функция, которая разбивает выборку по датам, напрмиер (1 января за все года)

def get_df_with_day_and_month_for_all_year (df, field_name):
    data = pd.DataFrame()
    for month in range (1, 13):
        for day in range(1, 32):
            if day < 10 and month < 10: 
                pattern = r'\d{4}-'+f'0{month}-'+f'0{day}'
                tmp = df[df['DATE'].str.match(pattern)]
                if not tmp.empty:
                    pd.DataFrame(tmp).to_csv(f'{field_name}/0{month}-0{day}.csv')
                    tmp = rename_cols_rows(tmp, field_name)
                    data = pd.concat([data, tmp], axis = 1, join='outer', sort=True)
                    
            if day < 10 and month > 9: 
                pattern = r'\d{4}-'+f'{month}-'+f'0{day}'
                tmp = df[df['DATE'].str.match(pattern)]
                if not tmp.empty:
                    pd.DataFrame(tmp).to_csv(f'{field_name}/{month}-0{day}.csv')
                    tmp = rename_cols_rows(tmp, field_name)
                    data = pd.concat([data, tmp], axis = 1, join='outer', sort=True)
                    
            if day > 9 and month < 10:
                pattern = r'\d{4}-'+f'0{month}-'+f'{day}'
                tmp = df[df['DATE'].str.match(pattern)]
                if not tmp.empty:
                    pd.DataFrame(tmp).to_csv(f'{field_name}/0{month}-{day}.csv')
                    tmp = rename_cols_rows(tmp, field_name)
                    data = pd.concat([data, tmp], axis = 1, join='outer', sort=True)
                    pd.DataFrame(tmp).to_csv(f'{field_name}/0{month}-{day}.csv')
            if day > 9 and month > 9:
                pattern = r'\d{4}-'+f'{month}-'+f'{day}'
                tmp = df[df['DATE'].str.match(pattern)]
                if not tmp.empty:
                    pd.DataFrame(tmp).to_csv(f'{field_name}/{month}-{day}.csv')
                    tmp = rename_cols_rows(tmp, field_name)
                    data = pd.concat([data, tmp], axis = 1, join='outer', sort=True)
    pd.DataFrame(data).to_csv(f'dataset{field_name}.csv')
                   
    return data

In [None]:
# Функция для удоавления выбросов в столбце

def delete_outliers (df):
    q = df.quantile([0.25, 0.75])
    # Межквартильное растояние
    low = q[0.25] - 1.5 * (q[0.75] - q[0.25])
    high = q[0.75] + 1.5 * (q[0.75] - q[0.25])
    return df[df.between(low, high)]

In [None]:
# Отрисовка boxplot

def draw_box_plot (df, plot_name):
    fig = plt.figure(figsize=(6, 6))
    fig.suptitle(f'Boxplot for {df.name}', fontsize=14, fontweight='bold')
    ax = fig.add_subplot(111)
    _, bp = pd.DataFrame.boxplot(df, return_type='both', grid=False,  fontsize=15, figsize=(7,7))
    ax.set_ylabel(f'{plot_name}, $^o C$')
    plt.show()


In [None]:
t_max = get_df_with_day_and_month_for_all_year(TEMP_MAX, 'TMAX')
t_min = get_df_with_day_and_month_for_all_year(TEMP_MIN, 'TMIN')
# t_avg = get_df_with_day_and_month_for_all_year(TEMP_MIN, 'TAVG')


In [None]:
t_max
# t_max['02-29'].isna()

# a = t_max[['02-28', '02-29', '03-01']]
# from sklearn.impute import SimpleImputer
# imp_mean = SimpleImputer(missing_values=np.nan, strategy='mean')
# imp_mean.fit(a.T)


# b = imp_mean.transform(a.T)
# print(b[1])
# print(t_max['02-29'])

In [None]:
# tmp = t_max[['02-28','02-29', '03-01']]
# tmp['02-29']  = tmp.T.apply(lambda x: (x['02-28']+x['03-01'])/2 if np.isnan(x['02-29']) else x['02-29'])
# print(tmp)
# for row in tmp.itertuples():
#     print(row[2])
# def add_value_29_02 (tmp):
#     for row in tmp.itertuples():
#         if row[2].isna():
#             tmp[2] = (row[1].values + row[3].values)/2
#         else:
#              tmp[2] =  row[2]                
#     return(tmp[2])

# add_value_29_02 (tmp)
# tmp['02-29'] = tmp.apply(lambda row: (row['02-28'] + row['03-01'])/2)
# t_max['02-29']  = t_max.T.apply(lambda x: (x['02-28']+x['03-01'])/2 if np.isnan(x['02-29']) else x['02-29'])

# t_min['02-29'] = t_min.T.apply(lambda x: (x['02-28']+x['03-01'])/2 if np.isnan(x['02-29']) else x['02-29'])
t_max = t_max.drop('02-29', 1)
t_min = t_min.drop('02-29', 1)
# t_avg = t_avg.drop('02-29', 1)
# t_min['02-29']

In [None]:
draw_box_plot(t_max['03-26'], 'TMAX')
draw_box_plot(t_min['03-26'], 'TMIN')

In [None]:
t_max

In [None]:
t_min

In [None]:
#Создание тренировочного и тестового набора для проверки работы моделей

def create_train_and_test_datasets (data):
    data = data.dropna()
#     .apply(lambda x: delete_outliers(x)).dropna()
    X, y = data.T.iloc[:, :-1].values, data.T.iloc[:, -1]
#     X_test, y_test = data.T.iloc[:, 1:-1].values, data.T.iloc[:, -1]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=5)
#     X_train, X_test, y_train, y_test = np.array(X[:-8]), np.array(X[-8:-7]), np.array(y[:-8]), np.array(y[-8:-7])
    return X_train, X_test, y_train, y_test

In [None]:
# Стандартизация datasets для использования в моеделях

def standartization_dataset (X_train, X_test):
    stdsc = StandardScaler()
    X_train_std = stdsc.fit_transform(X_train)
    X_test_std = stdsc.transform(X_test)
    return X_train_std, X_test_std

In [None]:
def normalization_dataset (X_train, X_test):
    ms = MinMaxScaler(feature_range=(0,1))
    X_train_n = ms.fit_transform(X_train)
    X_test_n = ms.transform(X_test)
    return X_train_n, X_test_n

In [None]:
# Отрисовка столбчатых диаграм для оценки работы моделей

def draw_bar(df, field_name):
    df.plot.bar(color = ["dodgerblue", 'r', 'darkmagenta', 'c'], figsize=(7,7))
    plt.xticks(rotation=75)  
    plt.ylabel(f'{field_name}')    
    plt.title(f'{field_name}', fontsize=14, fontweight='bold')
    plt.show()
    
def draw_pie(df, field_name):
    df.plot.pie(subplots=True, figsize=(10,10), colormap='jet') 
    plt.title(f'{field_name}', fontsize=14, fontweight='bold')
    plt.ylabel(f' ')
    plt.show()

In [None]:
# Полученеие данных о модели

def estimate_model (model, X_train, y_train, y_test, prediction, model_name):
    score = model.score(X_train, y_train)
    MAE = mean_absolute_error(y_test, prediction)
    MSE = mean_squared_error(y_test, prediction)
    print(f'\n{model_name}')
    print("The Explained Variance: %.2f" % score)
    print("The Mean Absolute Error: %.2f degrees celsius" % MAE)
    print('The Square Error: %.2f' % MSE)
    return pd.DataFrame({"Score": score, 'MAE': MAE, 'MSE': MSE}, index=[f'{model_name}'])


In [None]:
def estimate_delta(y_test, prediction, name):
    DELTA = abs(y_test - prediction)
    less_one_degrees = DELTA.apply(lambda x: x < 1).value_counts()/DELTA.size*100
    less_three_degrees = DELTA.apply(lambda x: 1< x < 3).value_counts()/DELTA.size*100
    less_five_degrees = DELTA.apply(lambda x: 3 < x < 5).value_counts()/DELTA.size*100
    gr_five_degrees = DELTA.apply(lambda x:  x > 5).value_counts()/DELTA.size*100
    df = pd.DataFrame({"Ошибка меньше 1 градуса": less_one_degrees.drop(less_one_degrees.index[0]), "Ошибка от 1 до 3 градусов": less_three_degrees.drop(less_three_degrees.index[0]),
                       'Ошибка от 3 до 5 градусов': less_five_degrees.drop(less_five_degrees.index[0]), 
                       'Ошибка больше 5 градусов': gr_five_degrees.drop(gr_five_degrees.index[0])})
    draw_pie(df.T, f'Ошибки в прогнозировании {name}')
    print(df)
#     df.T.plot.pie(subplots=True, figsize=(7,7))
#     ig = plt.figure(figsize=(6,6), dpi=200)
#     ax = plt.subplot(True)

#     df.plot(kind='pie', ax=ax, autopct='%1.1f%%', startangle=270, fontsize=17, color = ["dodgerblue", 'r', 'darkmagenta', 'c'])
#     print("Меньше 1 градуса\n", less_one_degrees)
#     print("От 1 до 3 градусов\n", less_three_degrees.value_counts()/DELTA.size*100)
#     print("От 3 до 5 градусов\n", less_five_degrees.value_counts()/DELTA.size*100)
#     print("Больше 5 градусов\n", gr_five_degrees.value_counts()/DELTA.size*100)


In [None]:
X_train_max, X_test_max, y_train_max, y_test_max = create_train_and_test_datasets(t_max)
X_train_min, X_test_min, y_train_min, y_test_min = create_train_and_test_datasets(t_min)

In [None]:
def reserch_prediction(data, field_name):
    X_train, X_test, y_train, y_test = create_train_and_test_datasets(data)
    X_train_std, X_test_std = standartization_dataset (X_train, X_test)
    X_train_n, X_test_n = normalization_dataset (X_train, X_test)
#     print(X_test, X_test.shape)
#     print(y_test, y_test.shape)
#     x = X_train[0]
#     z = X_train_std[0]
#     p = X_train_n[0]
#     print(y_train[0])
#     np.array(data.dropna().T.iloc[10:12, 0:-1].values)
#     print([x], [x].shape)
    LR = LinearRegression()
    LR.fit(X_train, y_train)
    svr = SVR(C=100.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1,
        gamma='auto_deprecated', kernel='rbf', max_iter=-1, shrinking=True,
        tol=0.001, verbose=False)
    svr.fit(X_train_std, y_train)
    mlp = MLPRegressor(hidden_layer_sizes=(2,), max_iter=5000, 
                       activation='relu', solver='adam', alpha=0.001,
                               random_state=1, learning_rate_init=0.01)
    mlp.fit(X_train_n, y_train)
    random_forest = RandomForestRegressor(max_depth=10, random_state=0,
                                 n_estimators=1000)
    random_forest.fit(X_train, y_train) 

    prediction_LR = np.array([])
    prediction_RF = np.array([])
    prediction_mlp = np.array([])
    prediction_SVM = np.array([])
    
    for x_test in X_test:
        prediction_LR = np.append(prediction_LR, LR.predict([x_test]))
        prediction_RF = np.append(prediction_RF, random_forest.predict([x_test]))
    for i in X_test_std:
        prediction_SVM = np.append(prediction_SVM, svr.predict([i]))
    for i in X_test_n:
        prediction_mlp = np.append(prediction_mlp, mlp.predict([i]))
    
#     df1 =  df1 = pd.DataFrame(
#         {'TEMP_SVM': svr.predict([z]), 'TEMP_MLP': mlp.predict([p]), 'TEMP_RF': random_forest.predict([x])})
#     print(df1)
    
    
#     estimate_LR = estimate_model(LR, X_train, y_train, y_test, prediction_LR, f"LR {field_name}")
    estimate_RF = estimate_model(random_forest, X_train, y_train, y_test, prediction_RF, f"RF {field_name}")
    estimate_delta(y_test, prediction_RF, 'RF')
    estimate_SVM = estimate_model(svr, X_train_std, y_train, y_test, prediction_SVM, f"SVM {field_name}")
    estimate_delta(y_test, prediction_SVM, 'SVM')
    estimate_MLP = estimate_model(mlp, X_train_n, y_train, y_test, prediction_mlp, f"MLP {field_name}")
    estimate_delta(y_test, prediction_mlp, 'MLP')
    
    estimate = pd.concat([estimate_SVM, estimate_MLP, estimate_RF], axis = 0, join='outer', sort=True)
    
    df = pd.DataFrame({'TEMP': y_test, 'TEMP_LR': prediction_LR, 'TEMP_SVM': prediction_SVM, 'TEMP_MLP': prediction_mlp, 'TEMP_RF': prediction_RF})
    df.plot(y = ['TEMP', 'TEMP_SVM'],color = ["dodgerblue", 'r'], style = ['', '--'],linewidth=1, figsize=(25,10), )
    plt.grid(True)
    plt.xticks(rotation=75)
    plt.xlabel('Date')    
    plt.ylabel(f'{field_name} temperature, $^o, C$')    
    plt.title(f'Предсказание {field_name} температуры моделю SVM', fontsize=14, fontweight='bold')
    plt.show()
    df.plot(y = ['TEMP', 'TEMP_MLP'],color = ["dodgerblue", 'darkmagenta'], style = ['',  ':'],linewidth=1, figsize=(25,10), )
    plt.grid(True)
    plt.xticks(rotation=75)
    plt.xlabel('Date')    
    plt.ylabel(f'{field_name} temperature, $^o, C$')    
    plt.title(f'Предсказание {field_name} температуры моделю MLP', fontsize=14, fontweight='bold')
    plt.show()
    df.plot(y = ['TEMP', 'TEMP_RF'],color = ["dodgerblue", 'y'], style = ['',  '-.'],linewidth=1, figsize=(25,10), )
    plt.grid(True)
    plt.xticks(rotation=75)
    plt.xlabel('Date')    
    plt.ylabel(f'{field_name} temperature, $^o, C$')    
    plt.title(f'Предсказание {field_name} температуры моделью RF', fontsize=14, fontweight='bold')
    plt.show()
    
    return  df, estimate

In [None]:
df_max, estimate_max = reserch_prediction(t_max, 'MAX')


In [None]:
draw_bar(estimate_max['Score'], 'Коэффициент детерминизации')
draw_bar(estimate_max['MAE'], 'MAE')
draw_bar(estimate_max['MSE'], 'MSE')
estimate_max

In [None]:
df_min, estimate_min = reserch_prediction(t_min, 'MIN')

In [None]:
draw_bar(estimate_min['Score'], 'Коэффициент детерминизации')
draw_bar(estimate_min['MAE'], 'MAE')
draw_bar(estimate_min['MSE'], 'MSE')
estimate_min

In [None]:
def reserch_prediction(data, field_name):
    X_train, X_test, y_train, y_test = create_train_and_test_datasets(data)
    X_train_std, X_test_std = standartization_dataset(X_train, X_test)
    X_train_n, X_test_n = normalization_dataset(X_train, X_test)

    LR = LinearRegression()
    LR.fit(X_train, y_train)
    prediction_LR = LR.predict(X_test)
    estimate_LR = estimate_model(LR, X_train, y_train, y_test, prediction_LR, f"LR {field_name}")

    svr = SVR(C=100.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1,
              gamma='auto_deprecated', kernel='rbf', max_iter=-1, shrinking=True,
              tol=0.001, verbose=False)
    svr.fit(X_train_std, y_train)
    prediction_SVM = svr.predict(X_test_std)
    estimate_SVM = estimate_model(svr, X_train_std, y_train, y_test, prediction_SVM, f"SVM {field_name}")

    mlp = MLPRegressor(hidden_layer_sizes=(3,), max_iter=5000,
                       activation='relu', solver='adam', alpha=0.001,
                       random_state=1, learning_rate_init=0.01)
    mlp.fit(X_train_n, y_train)
    prediction_mlp = mlp.predict(X_test_n)
    estimate_MLP = estimate_model(mlp, X_train_n, y_train, y_test, prediction_mlp, f"MLP {field_name}")

    random_forest = RandomForestRegressor(max_depth=10, random_state=0,
                                          n_estimators=1000)
    random_forest.fit(X_train, y_train)
    prediction_RF = random_forest.predict(X_test)
    estimate_RF = estimate_model(random_forest, X_train, y_train, y_test, prediction_RF, f"RF {field_name}")

    svm = SVR(C=100.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1,
              gamma='auto_deprecated', kernel='rbf', max_iter=-1, shrinking=True,
              tol=0.001, verbose=False)

    rf = RandomForestRegressor(max_depth=2, random_state=0,
                               n_estimators=100)

    estimate = pd.concat([estimate_LR, estimate_SVM, estimate_MLP, estimate_RF], axis=0, join='outer', sort=True)

    df = pd.DataFrame({'TEMP': y_test, 'TEMP_LR': prediction_LR, 'TEMP_SVM': prediction_SVM, 'TEMP_MLP': prediction_mlp,
                       'TEMP_RF': prediction_RF})
    df.plot(y=['TEMP', 'TEMP_SVM', 'TEMP_MLP', 'TEMP_RF'], color=["dodgerblue", 'r', 'darkmagenta', 'c', 'y'],
            style=['', '--', ':', '-.', ''], linewidth=1, figsize=(25, 10), )
    plt.grid(True)
    plt.xticks(rotation=75)
    plt.xlabel('Date')
    plt.ylabel(f'{field_name} temperature, $^o, C$')
    plt.title(f'Предсказание {field_name} температуры', fontsize=14, fontweight='bold')
    plt.show()

    return df, estimate

# Считываем данные о min и max температурах

df_max, estimate_max = reserch_prediction(t_max, 'MAX')
draw_bar(estimate_max['Score'], 'Коэффициент детерминизации')
draw_bar(estimate_max['MAE'], 'MAE')
draw_bar(estimate_max['MSE'], 'MSE')

df_min, estimate_min = reserch_prediction(t_min, 'MIN')
draw_bar(estimate_min['Score'], 'Коэффициент детерминизации')
draw_bar(estimate_min['MAE'], 'MAE')
draw_bar(estimate_min['MSE'], 'MSE')

In [None]:
regressor = LinearRegression()
regressor.fit(X_train, y_train)
prediction1 = regressor.predict(X_test)

print("The Explained Variance: %.2f" % regressor.score(X_train, y_train))  
print("The Mean Absolute Error: %.2f degrees celsius" % mean_absolute_error(y_test, prediction1))  
print("The Median Absolute Error: %.2f degrees celsius" % median_absolute_error(y_test, prediction1))
print('The Square Error: %.2f' % mean_squared_error(y_test, prediction1))



In [None]:

X, y = data.T.iloc[:, :-1].values, data.T.iloc[:, -1]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0, random_state=0)
X_test = np.array(X[1:])

stdsc = StandardScaler()
X_train_std = stdsc.fit_transform(X_train)
X_test_std = stdsc.transform(X_test)

#Fitting the Classifier
SVR = SVR(C=100.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1,
    gamma='auto_deprecated', kernel='rbf', max_iter=-1, shrinking=True,
    tol=0.001    data = data.dropna()
#     apply(lambda x: delete_outliers(x)).dropna()
    X, y = data.T.iloc[:, :-1].values, data.T.iloc[:, -1]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0), verbose=False)

SVR.fit(X_train_std, y_train)

prediction = SVR.predict(X_test_std)
print("SVM")
print('The Explained Variance: %.2f' % SVR.score(X_train_std, y_train))
# print('The Mean Absolute Error: %.2f degrees celcius' % mean_absolute_error(
#     y_test, prediction))
# print('The Median Absolute Error: %.2f degrees celcius' %
#       median_absolute_error(y_test, prediction))
# print('The Square Error: %.2f' %
#       mean_squared_error(y_test, prediction))

In [None]:
from sklearn.preprocessing import MinMaxScaler

ms = MinMaxScaler(feature_range=(0,1))
X_train_n = ms.fit_transform(X_train)
X_test_n = ms.transform(X_test)

from sklearn.neural_network import MLPRegressor
mlp = MLPRegressor(hidden_layer_sizes=(3,), max_iter=5000, 
                   activation='relu', solver='adam', alpha=0.001,
                           random_state=1, learning_rate_init=0.01)
mlp.fit(X_train_n, y_train)
prediction_mlp = mlp.predict(X_test_n)

print('\nMLP')
print("The Explained Variance: %.2f" % mlp.score(X_train_n, y_train))  
print("The Mean Absolute Error: %.2f degrees celsius" % mean_absolute_error(y_test, prediction_mlp))  
print("The Median Absolute Error: %.2f degrees celsius" % median_absolute_error(y_test, prediction_mlp))
print('The Square Error: %.2f' % mean_squared_error(y_test, prediction_mlp))

In [None]:
# from sklearn.tree import DecisionTreeRegressor

# clf = DecisionTreeRegressor()
# clf = clf.fit(X_train_n, y_train)
# prediction_DT = clf.predict(X_test_n)

# print('\nDecision Tree')
# print("The Explained Variance: %.2f" % mlp.score(X_train_n, y_train))  
# print("The Mean Absolute Error: %.2f degrees celsius" % mean_absolute_error(y_test, prediction_DT))  
# print("The Median Absolute Error: %.2f degrees celsius" % median_absolute_error(y_test, prediction_DT))
# print('The Square Error: %.2f' % mean_squared_error(y_test, prediction_DT))

In [None]:
from sklearn.ensemble import AdaBoostRegressor
from sklearn.datasets import make_regression
                      
regr = AdaBoostRegressor(base_estimator=None, learning_rate=1.0, loss='linear',
        n_estimators=1000, random_state=0)
regr.fit(X_train_std, y_train)  

prediction_Ada_Boost = regr.predict(X_test_std)

print('\nAda Boost')
print("The Explained Variance: %.2f" % regr.score(X_train_std, y_train))  
print("The Mean Absolute Error: %.2f degrees celsius" % mean_absolute_error(y_test, prediction_Ada_Boost))  
print("The Median Absolute Error: %.2f degrees celsius" % median_absolute_error(y_test, prediction_Ada_Boost))
print('The Square Error: %.2f' % mean_squared_error(y_test, prediction_Ada_Boost))

In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.datasets import make_regression

data = data.dropna()

X, y = data.T.iloc[:, :-1].values, data.T.iloc[:, -1]
X_train, X_test, y_train, y_test = np.array(X[:-1]), np.array(X[-1:]), np.array(y[:-1]), np.array(y[-1:])
X_test2, y_test2 =  np.array(X[-2:-1]), np.array(y[-2:-1])


random_forest = RandomForestRegressor(max_depth=10, random_state=0,
                             n_estimators=1000)
random_forest.fit(X_train, y_train) 
prediction_RF_1 = random_forest.predict(X_test)
prediction_RF_2 = random_forest.predict(X_test2)

print(prediction_RF_1, prediction_RF_2)

In [None]:
t_max.dropna()

In [None]:
df = pd.DataFrame({'TEMP': y_test, 'TEMP_LR': prediction1, 'TEMP_SCR': prediction, 'TEMP_MLP': prediction_mlp, 'TEMP_RF': prediction_RF})

df.plot(y = ['TEMP', 'TEMP_SCR', 'TEMP_MLP', 'TEMP_RF'],color = ["dodgerblue", 'r', 'y', 'g', 'darkmagenta', 'pink'], style = ['', '--', '-', '-.', ''],linewidth=1, figsize=(25,10), )
plt.grid(True)
plt.xticks(rotation=75)
plt.xlabel('Date')    
plt.ylabel('Max temperature, $^o, C$')    
plt.title('Предсказание  максимальной температуры', fontsize=14, fontweight='bold')
plt.show()


In [None]:
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
imp = SimpleImputer(missing_values=np.nan, strategy='mean')
imp.fit(t_max[['01-02']])  

B = np.round(imp.transform(t_max[['01-01']]), 2)
# print(B)
b = t_max.apply(lambda x: np.round(imp.transform(pd.DataFrame(x)), 2))

In [None]:
def reserch_prediction(data, field_name):
    X_train, X_test, y_train, y_test = create_train_and_test_datasets(data)
    X_train_std, X_test_std = standartization_dataset (X_train, X_test)
    X_train_n, X_test_n = normalization_dataset (X_train, X_test)
#     print(X_test, X_test.shape)
#     print(y_test, y_test.shape)
#     x = X_train[0]
#     z = X_train_std[0]
#     p = X_train_n[0]
#     print(y_train[0])
#     np.array(data.dropna().T.iloc[10:12, 0:-1].values)
#     print([x], [x].shape)
    LR = LinearRegression()
    LR.fit(X_train, y_train)
    svr = SVR(C=100.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1,
        gamma='auto_deprecated', kernel='rbf', max_iter=-1, shrinking=True,
        tol=0.001, verbose=False)
    svr.fit(X_train_std, y_train)
    mlp = MLPRegressor(hidden_layer_sizes=(3,), max_iter=5000, 
                       activation='relu', solver='adam', alpha=0.001,
                               random_state=1, learning_rate_init=0.01)
    mlp.fit(X_train_n, y_train)
    random_forest = RandomForestRegressor(max_depth=10, random_state=0,
                                 n_estimators=1000)
    random_forest.fit(X_train, y_train) 

    prediction_LR = np.array([])
    prediction_RF = np.array([])
    prediction_mlp = np.array([])
    prediction_SVM = np.array([])
    
    for x_test in X_test:
        prediction_LR = np.append(prediction_LR, LR.predict([x_test]))
        prediction_RF = np.append(prediction_RF, random_forest.predict([x_test]))
    for i in X_test_std:
        prediction_SVM = np.append(prediction_SVM, svr.predict([i]))
    for i in X_test_n:
        prediction_mlp = np.append(prediction_mlp, mlp.predict([i]))
    
#     df1 =  df1 = pd.DataFrame(
#         {'TEMP_SVM': svr.predict([z]), 'TEMP_MLP': mlp.predict([p]), 'TEMP_RF': random_forest.predict([x])})
#     print(df1)
    
    
#     estimate_LR = estimate_model(LR, X_train, y_train, y_test, prediction_LR, f"LR {field_name}")
    estimate_RF = estimate_model(random_forest, X_train, y_train, y_test, prediction_RF, f"RF {field_name}")
    estimate_SVM = estimate_model(svr, X_train_std, y_train, y_test, prediction_SVM, f"SVM {field_name}")
    estimate_MLP = estimate_model(mlp, X_train_n, y_train, y_test, prediction_mlp, f"MLP {field_name}")
    
    estimate = pd.concat([estimate_SVM, estimate_MLP, estimate_RF], axis = 0, join='outer', sort=True)
    
    df = pd.DataFrame({'TEMP': y_test, 'TEMP_LR': prediction_LR, 'TEMP_SVM': prediction_SVM, 'TEMP_MLP': prediction_mlp, 'TEMP_RF': prediction_RF})
    df.plot(y = ['TEMP', 'TEMP_SVM', 'TEMP_MLP', 'TEMP_RF'],color = ["dodgerblue", 'r', 'darkmagenta', 'c', 'y'], style = ['', '--', ':', '-.', ''],linewidth=1, figsize=(25,10), )
    plt.grid(True)
    plt.xticks(rotation=75)
    plt.xlabel('Date')    
    plt.ylabel(f'{field_name} temperature, $^o, C$')    
    plt.title(f'Предсказание {field_name} температуры', fontsize=14, fontweight='bold')
    plt.show()
    
    return  df, estimate