In [1]:
!pip install openpyxl

Collecting openpyxl
  Downloading openpyxl-3.1.2-py2.py3-none-any.whl.metadata (2.5 kB)
Collecting et-xmlfile (from openpyxl)
  Downloading et_xmlfile-1.1.0-py3-none-any.whl.metadata (1.8 kB)
Downloading openpyxl-3.1.2-py2.py3-none-any.whl (249 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m250.0/250.0 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hDownloading et_xmlfile-1.1.0-py3-none-any.whl (4.7 kB)
Installing collected packages: et-xmlfile, openpyxl
Successfully installed et-xmlfile-1.1.0 openpyxl-3.1.2


In [2]:
import json
import numpy as np
import pandas as pd
from datetime import datetime, timedelta
import pickle
import joblib

# Загрузите JSON-файл
with open('./data.json', 'r') as file:
    data = json.load(file)

# Инициализация списков для расширенных дат и средних значений NDVI
expanded_dates = []
expanded_ndvi_values = []

# Извлечение и расширение данных из JSON
for entry in data['data']:
    from_date = datetime.strptime(entry['interval']['from'], '%Y-%m-%dT%H:%M:%SZ')
    to_date = datetime.strptime(entry['interval']['to'], '%Y-%m-%dT%H:%M:%SZ')
    mean_ndvi = entry['outputs']['ndvi']['bands']['B0']['stats']['mean']
    
    current_date = from_date
    while current_date <= to_date:
        expanded_dates.append(current_date)
        expanded_ndvi_values.append(mean_ndvi)
        current_date += timedelta(days=1)

# Преобразование данных в DataFrame
ndvi_df = pd.DataFrame({
    'date': expanded_dates,
    'mean_ndvi': expanded_ndvi_values
})

# Загрузка данных о погоде из Excel
weather_df = pd.read_excel('./Погода.xlsx')

# Преобразование столбца дат в datetime
weather_df['Дата'] = pd.to_datetime(weather_df['Дата'])

# Объединение данных о погоде и NDVI по дате
merged_df = pd.merge(weather_df, ndvi_df, left_on='Дата', right_on='date', how='left')

# Удаление ненужных столбцов
merged_df.drop(columns=['date'], inplace=True)

# Вывод результатов
print(merged_df)


           Дата  Максимальная температура  Минимальная температура  \
0    2017-01-04                       1.0                     -1.0   
1    2017-01-05                       7.3                      0.9   
2    2017-01-06                       8.7                      3.8   
3    2017-01-07                       8.3                     -1.1   
4    2017-01-08                       2.3                     -1.4   
...         ...                       ...                      ...   
1077 2021-12-18                       3.9                      1.4   
1078 2021-12-19                       3.8                      0.3   
1079 2021-12-19                       3.8                      0.3   
1080 2021-12-20                       5.7                      0.2   
1081 2021-12-21                       2.7                     -1.5   

      Средняя температура  Атмосферное давление  Скорость ветра  Осадки  \
0                    -0.1                1006.7             2.0     0.0   
1        

In [3]:
merged_df

Unnamed: 0,Дата,Максимальная температура,Минимальная температура,Средняя температура,Атмосферное давление,Скорость ветра,Осадки,Эффективная температура,mean_ndvi
0,2017-01-04,1.0,-1.0,-0.1,1006.7,2.0,0.0,-3.0,0.029765
1,2017-01-05,7.3,0.9,3.6,1003.1,3.0,0.0,0.2,0.029765
2,2017-01-06,8.7,3.8,6.2,1004.7,2.0,0.0,3.7,0.029765
3,2017-01-07,8.3,-1.1,2.6,999.3,2.0,0.0,-0.2,0.029765
4,2017-01-08,2.3,-1.4,0.7,1002.9,2.0,0.0,-2.1,0.029765
...,...,...,...,...,...,...,...,...,...
1077,2021-12-18,3.9,1.4,2.1,1003.2,2.0,2.0,-0.6,-0.007329
1078,2021-12-19,3.8,0.3,1.9,1002.3,3.0,0.0,-1.6,-0.007329
1079,2021-12-19,3.8,0.3,1.9,1002.3,3.0,0.0,-1.6,0.005995
1080,2021-12-20,5.7,0.2,3.0,993.7,2.0,1.0,0.4,0.005995


In [4]:
# Создаем полный диапазон дат
full_date_range = pd.date_range(start='2017-01-04', end='2021-12-21')

# Преобразуем даты в DataFrame
full_date_df = pd.DataFrame({'Дата': full_date_range})

# Проверяем, какие даты отсутствуют в объединенном DataFrame
missing_dates = full_date_df[~full_date_df['Дата'].isin(merged_df['Дата'])]

# Выводим отсутствующие даты
print("Отсутствующие даты:")
print(missing_dates)

Отсутствующие даты:
           Дата
87   2017-04-01
88   2017-04-02
89   2017-04-03
90   2017-04-04
91   2017-04-05
...         ...
1548 2021-04-01
1549 2021-04-02
1550 2021-04-03
1551 2021-04-04
1552 2021-04-05

[813 rows x 1 columns]


In [5]:
full_date_df


Unnamed: 0,Дата
0,2017-01-04
1,2017-01-05
2,2017-01-06
3,2017-01-07
4,2017-01-08
...,...
1808,2021-12-17
1809,2021-12-18
1810,2021-12-19
1811,2021-12-20


In [6]:
# Сохранение объединенного DataFrame в CSV файл
merged_df.to_csv('./merged_weather_ndvi.csv', index=False)

In [7]:
# pip install tensorflow==2.9.3

In [8]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

# Загрузка объединенных данных
merged_df = pd.read_csv('./merged_weather_ndvi.csv')

# Заполнение пропущенных значений
merged_df.fillna(method='ffill', inplace=True)

# Выбор признаков и целевой переменной
features = merged_df[['Максимальная температура', 'Минимальная температура', 'Средняя температура',
                      'Атмосферное давление', 'Скорость ветра', 'Осадки', 'Эффективная температура']]
target = merged_df['mean_ndvi']

# Нормализация данных
scaler = MinMaxScaler()
features_scaled = scaler.fit_transform(features)

# Разделение данных на тренировочный, валидационный и тестовый наборы
X_train, X_temp, y_train, y_temp = train_test_split(features_scaled, target, test_size=0.2, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Преобразование данных для LSTM
X_train = np.reshape(X_train, (X_train.shape[0], 1, X_train.shape[1]))
X_val = np.reshape(X_val, (X_val.shape[0], 1, X_val.shape[1]))
X_test = np.reshape(X_test, (X_test.shape[0], 1, X_test.shape[1]))


In [9]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

# Создание модели
model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(LSTM(units=50))
model.add(Dense(1))

# Компиляция модели
model.compile(optimizer='adam', loss='mean_squared_error')

# Вывод архитектуры модели
model.summary()

2024-05-22 15:51:48.107565: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-05-22 15:51:48.114125: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2024-05-22 15:51:48.114145: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2024-05-22 15:51:50.080691: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2024-05-22 15:51:50.080738: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to c

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 1, 50)             11600     
                                                                 
 lstm_1 (LSTM)               (None, 50)                20200     
                                                                 
 dense (Dense)               (None, 1)                 51        
                                                                 
Total params: 31,851
Trainable params: 31,851
Non-trainable params: 0
_________________________________________________________________


In [10]:
# Обучение модели
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_val, y_val))

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [11]:
from sklearn.metrics import mean_squared_error, mean_absolute_error

# Предсказание на тестовом наборе
y_pred = model.predict(X_test)

# Вычисление метрик
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
mae = mean_absolute_error(y_test, y_pred)

print(f'RMSE: {rmse}')
print(f'MAE: {mae}')



RMSE: 0.12986342480812038
MAE: 0.10050039331861631


In [12]:
# save the model 
filename_pl = './model_pl.tar'
pickle.dump(model, open(filename_pl, 'wb'))

filename_jl = './model_jl.tar'
joblib.dump(model, open(filename_jl, 'wb')) 



INFO:tensorflow:Assets written to: ram://47698c56-0906-4d16-a4cf-3f43504ce0e0/assets


INFO:tensorflow:Assets written to: ram://47698c56-0906-4d16-a4cf-3f43504ce0e0/assets


INFO:tensorflow:Assets written to: ram://574d490f-05c5-4d31-a1ec-e36ab274015e/assets


INFO:tensorflow:Assets written to: ram://574d490f-05c5-4d31-a1ec-e36ab274015e/assets


In [13]:
model_copy = joblib.load(open(filename_jl, 'rb'))

In [14]:
# Предсказание на тестовом наборе
y_pred_copy = model_copy.predict(X_test)

# Вычисление метрик
rmse_copy = np.sqrt(mean_squared_error(y_test, y_pred_copy))
mae_copy = mean_absolute_error(y_test, y_pred_copy)

print(f'RMSE: {rmse_copy}')
print(f'MAE: {mae_copy}')

RMSE: 0.12986342480812038
MAE: 0.10050039331861631


In [17]:
X_test

array([[[0.4139265 , 0.52359551, 0.44857768, 0.41549296, 0.33333333,
         0.        , 0.44660194]],

       [[0.33462282, 0.4       , 0.34135667, 0.59859155, 1.        ,
         0.        , 0.29126214]],

       [[0.86653772, 0.88314607, 0.88402626, 0.342723  , 0.16666667,
         0.        , 0.92815534]],

       [[0.4410058 , 0.47865169, 0.42888403, 0.28873239, 0.33333333,
         0.        , 0.43300971]],

       [[0.11025145, 0.25617978, 0.14660832, 0.61971831, 0.5       ,
         0.17073171, 0.14174757]],

       [[0.49709865, 0.5505618 , 0.50109409, 0.53286385, 0.33333333,
         0.02439024, 0.51067961]],

       [[0.55125725, 0.48539326, 0.50328228, 0.58685446, 0.66666667,
         0.        , 0.46990291]],

       [[0.44294004, 0.56404494, 0.47483589, 0.41314554, 0.33333333,
         0.        , 0.47572816]],

       [[0.28239845, 0.29438202, 0.28008753, 0.78403756, 0.33333333,
         0.        , 0.28349515]],

       [[0.43907157, 0.54157303, 0.4595186 , 0.3028169 

In [23]:
np.array([[[0.53578337, 0.48988764, 0.51422319, 0.39906103, 0.5       ,
         0.02439024, 0.50097087]]])

array([[[0.53578337, 0.48988764, 0.51422319, 0.39906103, 0.5       ,
         0.02439024, 0.50097087]]])

In [26]:
model_copy.predict(np.array([[[0.53578337, 0.48988764, 0.51422319, 0.39906103, 0.5       ,
         0.02439024, 0.50097087]]]))[0][0]



0.1710681

In [27]:
model_copy.predict(np.array([[[0.69825919, 0.6741573 , 0.67396061, 0.56807512, 0.5       ,
         0.        , 0.65825243]]]))[0][0]



0.2834996