In [5]:
import pandas as pd
import numpy as np


# Выгрузка и предобработка данных

In [6]:
data = pd.read_excel('.\data.xls')

In [7]:
data.head

<bound method NDFrame.head of       Местное время в Москве (ВДНХ)     T
0                  01.10.2024 21:00  11.6
1                  01.10.2024 18:00  15.2
2                  01.10.2024 15:00  16.7
3                  01.10.2024 12:00  13.2
4                  01.10.2024 09:00   8.3
...                             ...   ...
40239              01.01.2014 04:00   1.5
40240              01.01.2014 03:00   1.5
40241              01.01.2014 02:00   1.3
40242              01.01.2014 01:00   1.7
40243              01.01.2014 00:00   1.3

[40244 rows x 2 columns]>

 Преобразование времени

In [8]:
data['Местное время в Москве (ВДНХ)'] = pd.to_datetime(data['Местное время в Москве (ВДНХ)'],format = '%d.%m.%Y %H:%M')

Усреднение времени по дням

In [9]:
data_mean_temp = data.resample('D', on='Местное время в Москве (ВДНХ)').mean()

data_mean_temp

Unnamed: 0_level_0,T
Местное время в Москве (ВДНХ),Unnamed: 1_level_1
2014-01-01,0.575000
2014-01-02,-2.454167
2014-01-03,-2.466667
2014-01-04,-1.741667
2014-01-05,0.265217
...,...
2024-09-27,17.037500
2024-09-28,18.112500
2024-09-29,16.587500
2024-09-30,11.300000


Определение декады, извелечение месяца и года из индекса

In [10]:
data_decaded = data_mean_temp
data_decaded['day'] = data_decaded.index.day

def decade_sort(x):
    if x > 20:
        return 3
    if x > 10:
        return 2
    else:
        return 1

data_decaded['decade'] = data_decaded['day'].apply(decade_sort)

data_decaded['year'] = data_decaded.index.year

data_decaded['month'] = data_decaded.index.month


data_decaded.head (30)

Unnamed: 0_level_0,T,day,decade,year,month
Местное время в Москве (ВДНХ),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2014-01-01,0.575,1,1,2014,1
2014-01-02,-2.454167,2,1,2014,1
2014-01-03,-2.466667,3,1,2014,1
2014-01-04,-1.741667,4,1,2014,1
2014-01-05,0.265217,5,1,2014,1
2014-01-06,0.854167,6,1,2014,1
2014-01-07,1.2,7,1,2014,1
2014-01-08,2.3625,8,1,2014,1
2014-01-09,2.720833,9,1,2014,1
2014-01-10,3.133333,10,1,2014,1


In [11]:
data_grouped = data_decaded

data_grouped = data_grouped.groupby(['year', 'month', 'decade'])['T'].mean()

data_grouped = data_grouped.reset_index()

data_grouped

Unnamed: 0,year,month,decade,T
0,2014,1,1,0.444855
1,2014,1,2,-9.188750
2,2014,1,3,-16.354167
3,2014,2,1,-5.371705
4,2014,2,2,0.547500
...,...,...,...,...
383,2024,8,3,20.151136
384,2024,9,1,18.411250
385,2024,9,2,18.520000
386,2024,9,3,14.478750


Удаление лишних месяцев

In [12]:
data_filtered = data_grouped

months_to_keep = [10, 11, 12, 1]

data_filtered = data_filtered[data_filtered['month'].isin(months_to_keep)]

# Условия для фильтрации
condition = ~( (data_filtered['month'] == 1) & (data_filtered['decade'].isin([2, 3])) )

# Применяем фильтрацию
data_filtered = data_filtered[condition]

data_filtered = data_filtered.drop(0)

print (data_filtered.head)


<bound method NDFrame.head of      year  month  decade          T
27   2014     10       1   5.892369
28   2014     10       2   5.638750
29   2014     10       3  -0.111364
30   2014     11       1   2.950870
31   2014     11       2  -0.956250
..    ...    ...     ...        ...
357  2023     12       1  -8.941250
358  2023     12       2  -4.627500
359  2023     12       3  -0.185227
360  2024      1       1 -17.697500
387  2024     10       1  11.212500

[101 rows x 4 columns]>


In [13]:
decoded_data = data_filtered.copy(deep=True)

bins = [-40, -10, -2, 0, 4, 12, 18, 40]
labels = ['-40..-10','-10..-2','-2..0','0..4','4..12','12..18','18..40']

decoded_data['T'] = pd.cut(decoded_data['T'],bins=bins,labels=labels,include_lowest=True)

october2024_T = decoded_data['T'][387]

decoded_data = decoded_data.drop(387)

print(decoded_data)

print(october2024_T)


     year  month  decade         T
27   2014     10       1     4..12
28   2014     10       2     4..12
29   2014     10       3     -2..0
30   2014     11       1      0..4
31   2014     11       2     -2..0
..    ...    ...     ...       ...
356  2023     11       3   -10..-2
357  2023     12       1   -10..-2
358  2023     12       2   -10..-2
359  2023     12       3     -2..0
360  2024      1       1  -40..-10

[100 rows x 4 columns]
4..12


Преобразуем значения во временные ряды

In [14]:
work_data = decoded_data

year_ranges = {}

for i in range(0, len(work_data ), 10):

    # Берем текущую группу из 10 значений
    group = work_data .iloc[i:i+10]
    
    # Получаем год из первого элемента в группе
    year = int(group['year'].iloc[0])
    
    # Сохраняем список значений T в словаре с указанным годом
    year_ranges[year] = group['T'].tolist()

# Вывод результата
print(year_ranges)


{2014: ['4..12', '4..12', '-2..0', '0..4', '-2..0', '-10..-2', '-10..-2', '0..4', '-10..-2', '-10..-2'], 2015: ['4..12', '4..12', '0..4', '0..4', '0..4', '-2..0', '0..4', '-2..0', '-2..0', '-40..-10'], 2016: ['4..12', '0..4', '0..4', '-2..0', '-10..-2', '-10..-2', '-10..-2', '-10..-2', '-2..0', '-40..-10'], 2017: ['4..12', '4..12', '0..4', '0..4', '0..4', '-10..-2', '-2..0', '0..4', '-2..0', '-2..0'], 2018: ['4..12', '4..12', '0..4', '0..4', '-2..0', '-10..-2', '-10..-2', '-10..-2', '-10..-2', '-10..-2'], 2019: ['4..12', '4..12', '4..12', '4..12', '0..4', '-10..-2', '0..4', '0..4', '0..4', '0..4'], 2020: ['12..18', '4..12', '4..12', '4..12', '-2..0', '0..4', '-10..-2', '-10..-2', '-10..-2', '-10..-2'], 2021: ['4..12', '4..12', '4..12', '4..12', '0..4', '0..4', '-10..-2', '-10..-2', '-40..-10', '-10..-2'], 2022: ['4..12', '4..12', '4..12', '0..4', '0..4', '-10..-2', '-10..-2', '-10..-2', '-2..0', '-10..-2'], 2023: ['4..12', '4..12', '0..4', '4..12', '-2..0', '-10..-2', '-10..-2', '-10..

# Вариант 1. Учитываем только первую и последнюю декаду

Cчитаем значения переходов

In [15]:
rows, cols = len(labels), len(labels)

count_matrix = [[0 for _ in range(cols)] for _ in range(rows)]


for key, value in year_ranges.items():
    index_first = labels.index(value[0])
    index_last = labels.index(value[9])
    count_matrix[index_first][index_last] += 1


print('\n'.join('\t'.join(map(str, row)) for row in count_matrix))


0	0	0	0	0	0	0
0	0	0	0	0	0	0
0	0	0	0	0	0	0
0	0	0	0	0	0	0
3	4	1	1	0	0	0
0	1	0	0	0	0	0
0	0	0	0	0	0	0


Считаем вероятности переходов

In [16]:
probability_matrix = count_matrix

for i in range(7):
    i_sum = np.sum(probability_matrix,axis=1)[i]
    if i_sum > 0:
        for j in range(7):
            probability_matrix[i][j] /= i_sum



for row in probability_matrix:
    print(" | ".join(f"{value:.2f}" for value in row))

0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00
0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00
0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00
0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00
0.33 | 0.44 | 0.11 | 0.11 | 0.00 | 0.00 | 0.00
0.00 | 1.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00
0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00


Определение итогового распределения вероятности для нашего случая

In [17]:
cur_state_vector = [0 for _ in range(7)]

cur_state = labels.index(october2024_T)

cur_state_vector[cur_state] = 1

cur_state_vector

probability_matrix = np.matrix (probability_matrix)

result_probabilities = cur_state_vector * probability_matrix

result_probabilities

matrix([[0.33333333, 0.44444444, 0.11111111, 0.11111111, 0.        ,
         0.        , 0.        ]])

Определение средних значений диапазонов

In [18]:
min_values = [0 for _ in range(7)]
max_values = [0 for _ in range(7)]

for i in range(7):
    min_values[i] = bins[i]
    max_values[i] = bins[i+1]

min_values[0] = min_values[1] *2/3 + min_values[0]/3
max_values[6] = max_values[5] *2/3 + max_values[6]/3

print(min_values)
print(max_values)

[-20.0, -10, -2, 0, 4, 12, 18]
[-10, -2, 0, 4, 12, 18, 25.333333333333336]


Итоговый прогноз

In [19]:
max = 0
min = 0

for i in range(6):
    max += max_values[i] * result_probabilities[0, i]
    min += min_values[i] * result_probabilities[0, i]

avg = (max + min)/2

print(max)
print(min)
print(avg)

-3.777777777777777
-11.333333333333332
-7.5555555555555545


In [20]:
bins

[-40, -10, -2, 0, 4, 12, 18, 40]

# Вариант 2. Учитываем все декады

Cчитаем значения переходов

In [21]:
rows, cols = len(labels), len(labels)

count_matrix2 = [[0 for _ in range(cols)] for _ in range(rows)]

for key, value in year_ranges.items():
    for pair in range (9):
        index_first = labels.index(value[pair])
        index_last = labels.index(value[pair+1])
        count_matrix2[index_first][index_last] += 1


print('\n'.join('\t'.join(map(str, row)) for row in count_matrix2))


0	1	0	0	0	0	0
1	17	4	2	0	0	0
3	5	2	4	0	0	0
0	6	6	11	1	0	0
0	0	3	8	15	0	0
0	0	0	0	1	0	0
0	0	0	0	0	0	0


Считаем вероятности переходов

In [22]:
probability_matrix2 = count_matrix2

for i in range(7):
    i_sum = np.sum(probability_matrix2,axis=1)[i]
    if i_sum > 0:
        for j in range(7):
            probability_matrix2[i][j] /= i_sum



for row in probability_matrix2:
    print(" | ".join(f"{value:.2f}" for value in row))

0.00 | 1.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00
0.04 | 0.71 | 0.17 | 0.08 | 0.00 | 0.00 | 0.00
0.21 | 0.36 | 0.14 | 0.29 | 0.00 | 0.00 | 0.00
0.00 | 0.25 | 0.25 | 0.46 | 0.04 | 0.00 | 0.00
0.00 | 0.00 | 0.12 | 0.31 | 0.58 | 0.00 | 0.00
0.00 | 0.00 | 0.00 | 0.00 | 1.00 | 0.00 | 0.00
0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00


Определение среднего распределения вероятностей для нашего случая

In [23]:
probability_matrix2 = np.matrix (probability_matrix2)

result_probabilities2 = cur_state_vector * probability_matrix2

Итоговый прогноз

In [24]:
max2 = 0
min2 = 0

for i in range(6):
    max2 += max_values[i] * result_probabilities2[0, i]
    min2 += min_values[i] * result_probabilities2[0, i]

avg2 = (max2 + min2)/2

print(max2)
print(min2)
print(avg2)

8.153846153846153
2.0769230769230766
5.115384615384615


# Вариант 3. С разными диапазонами для каждой декады

Определение матрицы кодирования. Смещаем исходный интервал на -0.5 в каждой следующей декаде

In [25]:
bins_num = len(bins)

bins_matrix = np.zeros((10, bins_num), dtype=float)

for i in range(10):
    for j in range(len(bins)):
        bins_matrix[i, j] = bins [j] - 0.5*i

bins_matrix

array([[-40. , -10. ,  -2. ,   0. ,   4. ,  12. ,  18. ,  40. ],
       [-40.5, -10.5,  -2.5,  -0.5,   3.5,  11.5,  17.5,  39.5],
       [-41. , -11. ,  -3. ,  -1. ,   3. ,  11. ,  17. ,  39. ],
       [-41.5, -11.5,  -3.5,  -1.5,   2.5,  10.5,  16.5,  38.5],
       [-42. , -12. ,  -4. ,  -2. ,   2. ,  10. ,  16. ,  38. ],
       [-42.5, -12.5,  -4.5,  -2.5,   1.5,   9.5,  15.5,  37.5],
       [-43. , -13. ,  -5. ,  -3. ,   1. ,   9. ,  15. ,  37. ],
       [-43.5, -13.5,  -5.5,  -3.5,   0.5,   8.5,  14.5,  36.5],
       [-44. , -14. ,  -6. ,  -4. ,   0. ,   8. ,  14. ,  36. ],
       [-44.5, -14.5,  -6.5,  -4.5,  -0.5,   7.5,  13.5,  35.5]])

Преобразуем данные в ряды

In [26]:
work_data3 = data_filtered.copy(deep=True)

work_data3 = work_data3.drop(387)

year_ranges3 = {}

for i in range(0, len(work_data ), 10):

    # Берем текущую группу из 10 значений
    group = work_data3 .iloc[i:i+10]
    
    # Получаем год из первого элемента в группе
    year = int(group['year'].iloc[0])
    
    # Сохраняем список значений T в словаре с указанным годом
    year_ranges3[year] = group['T'].tolist()

# Вывод результата
print(year_ranges3)

{2014: [5.8923686868686875, 5.63875, -0.1113636363636362, 2.950869565217391, -0.95625, -5.9991666666666665, -4.320760869565217, 1.004945652173913, -7.673484848484848, -7.01], 2015: [6.50625, 4.36125, 2.4886363636363638, 3.29, 0.5650000000000001, -1.4224999999999999, 1.795, -0.7096428571428571, -0.31590909090909114, -14.305000000000001], 2016: [9.73625, 3.065, 0.8465909090909091, -1.92125, -2.505, -3.525, -5.9725, -7.62, -0.6590909090909091, -13.346250000000001], 2017: [7.28375, 7.08125, 1.1556818181818183, 1.26625, 2.085, -3.22, -1.2149999999999999, 1.5325000000000002, -0.32954545454545453, -0.11374999999999998], 2018: [8.41, 10.7075, 3.285227272727272, 3.915, -1.74875, -4.00375, -3.76875, -6.008750000000001, -6.867045454545454, -6.395], 2019: [7.907500000000001, 10.7975, 7.903409090909091, 4.76875, 3.9162500000000002, -3.3549999999999995, 0.2653571428571429, 1.7474999999999998, 0.5227272727272726, 0.009999999999999986], 2020: [12.66, 8.0625, 7.029545454545455, 5.9350000000000005, -0.1

Заново декодируем исходные данные

In [27]:
decoded_data3 = year_ranges3

decoded_data3 = pd.DataFrame(year_ranges3)

labels = ['1 interval','2 interval','3 interval','4 interval','5 interval','6 interval','7 interval']

for i in range (10):
    print (decoded_data3.iloc[i]) #Выводим для проверки i-ую декаду
    bins_local = bins_matrix[i]
    print(bins_local) #Проверяем что для нее используется i-ая строка матрицы кодирования
    decoded_data3.iloc[i] = pd.cut(decoded_data3.iloc[i],bins=bins_local,labels=labels,include_lowest=True)

decoded_data3

2014     5.892369
2015     6.506250
2016     9.736250
2017     7.283750
2018     8.410000
2019     7.907500
2020    12.660000
2021     6.013750
2022    10.981250
2023     8.561250
Name: 0, dtype: float64
[-40. -10.  -2.   0.   4.  12.  18.  40.]
2014    5.63875
2015    4.36125
2016      3.065
2017    7.08125
2018    10.7075
2019    10.7975
2020     8.0625
2021    6.47375
2022    6.51625
2023    6.91625
Name: 1, dtype: object
[-40.5 -10.5  -2.5  -0.5   3.5  11.5  17.5  39.5]
2014   -0.111364
2015    2.488636
2016    0.846591
2017    1.155682
2018    3.285227
2019    7.903409
2020    7.029545
2021    6.584091
2022    4.513636
2023    1.380682
Name: 2, dtype: object
[-41. -11.  -3.  -1.   3.  11.  17.  39.]
2014    2.95087
2015       3.29
2016   -1.92125
2017    1.26625
2018      3.915
2019    4.76875
2020      5.935
2021    5.07125
2022    2.21625
2023      7.545
Name: 3, dtype: object
[-41.5 -11.5  -3.5  -1.5   2.5  10.5  16.5  38.5]
2014   -0.95625
2015      0.565
2016     -2.505
2017 

  decoded_data3.iloc[i] = pd.cut(decoded_data3.iloc[i],bins=bins_local,labels=labels,include_lowest=True)
  decoded_data3.iloc[i] = pd.cut(decoded_data3.iloc[i],bins=bins_local,labels=labels,include_lowest=True)
  decoded_data3.iloc[i] = pd.cut(decoded_data3.iloc[i],bins=bins_local,labels=labels,include_lowest=True)
  decoded_data3.iloc[i] = pd.cut(decoded_data3.iloc[i],bins=bins_local,labels=labels,include_lowest=True)
  decoded_data3.iloc[i] = pd.cut(decoded_data3.iloc[i],bins=bins_local,labels=labels,include_lowest=True)
  decoded_data3.iloc[i] = pd.cut(decoded_data3.iloc[i],bins=bins_local,labels=labels,include_lowest=True)
  decoded_data3.iloc[i] = pd.cut(decoded_data3.iloc[i],bins=bins_local,labels=labels,include_lowest=True)
  decoded_data3.iloc[i] = pd.cut(decoded_data3.iloc[i],bins=bins_local,labels=labels,include_lowest=True)
  decoded_data3.iloc[i] = pd.cut(decoded_data3.iloc[i],bins=bins_local,labels=labels,include_lowest=True)
  decoded_data3.iloc[i] = pd.cut(decoded_data3

Unnamed: 0,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023
0,5 interval,5 interval,5 interval,5 interval,5 interval,5 interval,6 interval,5 interval,5 interval,5 interval
1,5 interval,5 interval,4 interval,5 interval,5 interval,5 interval,5 interval,5 interval,5 interval,5 interval
2,4 interval,4 interval,4 interval,4 interval,5 interval,5 interval,5 interval,5 interval,5 interval,4 interval
3,5 interval,5 interval,3 interval,4 interval,5 interval,5 interval,5 interval,5 interval,4 interval,5 interval
4,4 interval,4 interval,3 interval,5 interval,4 interval,5 interval,4 interval,4 interval,4 interval,4 interval
5,2 interval,4 interval,3 interval,3 interval,3 interval,3 interval,4 interval,4 interval,2 interval,2 interval
6,3 interval,5 interval,2 interval,4 interval,3 interval,4 interval,2 interval,3 interval,2 interval,2 interval
7,5 interval,4 interval,2 interval,5 interval,2 interval,5 interval,3 interval,3 interval,4 interval,3 interval
8,2 interval,4 interval,4 interval,4 interval,2 interval,5 interval,4 interval,2 interval,4 interval,4 interval
9,2 interval,2 interval,2 interval,5 interval,3 interval,5 interval,4 interval,3 interval,2 interval,1 interval


Считаем включения

In [28]:
rows, cols = len(labels), len(labels)

count_matrix3 = [[0 for _ in range(cols)] for _ in range(rows)]

counting_data3 = decoded_data3.to_dict(orient='list')

for key, value in counting_data3.items():
    for pair in range (9):
        index_first = labels.index(value[pair])
        #print(index_first)
        index_last = labels.index(value[pair+1])
        #print(index_last)
        count_matrix3[index_first][index_last] += 1


print('\n'.join('\t'.join(map(str, row)) for row in count_matrix3))

0	0	0	0	0	0	0
0	5	5	2	0	0	0
0	3	4	4	1	0	0
1	7	3	9	8	0	0
0	1	2	14	20	0	0
0	0	0	0	1	0	0
0	0	0	0	0	0	0


Строим матрицу переходов

In [29]:
probability_matrix3 = count_matrix3

for i in range(7):
    i_sum = np.sum(probability_matrix3,axis=1)[i]
    if i_sum > 0:
        for j in range(7):
            probability_matrix3[i][j] /= i_sum


for row in probability_matrix3:
    print(" | ".join(f"{value:.2f}" for value in row))

0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00
0.00 | 0.42 | 0.42 | 0.17 | 0.00 | 0.00 | 0.00
0.00 | 0.25 | 0.33 | 0.33 | 0.08 | 0.00 | 0.00
0.04 | 0.25 | 0.11 | 0.32 | 0.29 | 0.00 | 0.00
0.00 | 0.03 | 0.05 | 0.38 | 0.54 | 0.00 | 0.00
0.00 | 0.00 | 0.00 | 0.00 | 1.00 | 0.00 | 0.00
0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00


Определяем границы интервалов

In [30]:
min_values3 = [0 for _ in range(7)]
max_values3 = [0 for _ in range(7)]

#Берем диапазон соответствующий 1 декаде января
bins3 = bins_matrix[9]

for i in range(7):
    min_values3[i] = bins3[i]
    max_values3[i] = bins3[i+1]

min_values3[0] = min_values3[1] *2/3 + min_values3[0]/3
max_values3[6] = max_values3[5] *2/3 + max_values3[6]/3

print(min_values3)
print(max_values3)

[np.float64(-24.5), np.float64(-14.5), np.float64(-6.5), np.float64(-4.5), np.float64(-0.5), np.float64(7.5), np.float64(13.5)]
[np.float64(-14.5), np.float64(-6.5), np.float64(-4.5), np.float64(-0.5), np.float64(7.5), np.float64(13.5), np.float64(20.833333333333336)]


Определение итогового распределения вероятности для нашего случая

In [31]:
probability_matrix3 = np.matrix (probability_matrix3)

result_probabilities3 = cur_state_vector * probability_matrix3

result_probabilities3

matrix([[0.        , 0.02702703, 0.05405405, 0.37837838, 0.54054054,
         0.        , 0.        ]])

Итоговый прогноз

In [32]:
max3 = 0
min3 = 0

for i in range(6):
    max3 += max_values3[i] * result_probabilities3[0, i]
    min3 += min_values3[i] * result_probabilities3[0, i]

avg3 = (max3 + min3)/2

print('Максимум',max3)
print('Минимум',min3)
print('Среднее',avg3)

Максимум 3.4459459459459465
Минимум -2.7162162162162162
Среднее 0.36486486486486513


# Результаты для разных вариантов

In [33]:

print('Вариант 1')
print('Максимум',max)
print('Минимум',min)
print('Среднее',avg)

print('Вариант 2')
print('Максимум',max2)
print('Минимум',min2)
print('Среднее',avg2)

print('Вариант 3')
print('Максимум',max3)
print('Минимум',min3)
print('Среднее',avg3)

Вариант 1
Максимум -3.777777777777777
Минимум -11.333333333333332
Среднее -7.5555555555555545
Вариант 2
Максимум 8.153846153846153
Минимум 2.0769230769230766
Среднее 5.115384615384615
Вариант 3
Максимум 3.4459459459459465
Минимум -2.7162162162162162
Среднее 0.36486486486486513
