## Анализ температурных данных и мониторинг текущей температуры через OpenWeatherMap API

In [1]:
import pandas as pd
import numpy as np
import requests
import datetime
import aiohttp
import asyncio
import time
import multiprocessing as mp
from multiprocessing import Pool
import nest_asyncio
nest_asyncio.apply()

In [2]:
data = pd.read_csv('/content/temperature_data.csv')
data.head()

Unnamed: 0,city,timestamp,temperature,season
0,New York,2010-01-01,3.353857,winter
1,New York,2010-01-02,6.865711,winter
2,New York,2010-01-03,-0.888902,winter
3,New York,2010-01-04,-9.5139,winter
4,New York,2010-01-05,2.683528,winter


In [3]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 54750 entries, 0 to 54749
Data columns (total 4 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   city         54750 non-null  object 
 1   timestamp    54750 non-null  object 
 2   temperature  54750 non-null  float64
 3   season       54750 non-null  object 
dtypes: float64(1), object(3)
memory usage: 1.7+ MB


In [4]:
data['timestamp'] = pd.to_datetime(data['timestamp'])

## 1. Анализ исторических данных

### Вычисления

Вычислим **скользящее среднее** температуры с окном в 30 дней для сглаживания краткосрочных колебаний

In [5]:
%%time
data['rolling_mean'] = data.set_index('timestamp').groupby('city')['temperature'].rolling(window='30D',min_periods=1).mean().reset_index(level=[0,1], drop=True)
data.head()

CPU times: user 17.7 ms, sys: 3.97 ms, total: 21.7 ms
Wall time: 28.2 ms


Unnamed: 0,city,timestamp,temperature,season,rolling_mean
0,New York,2010-01-01,3.353857,winter,4.509645
1,New York,2010-01-02,6.865711,winter,2.77171
2,New York,2010-01-03,-0.888902,winter,2.882208
3,New York,2010-01-04,-9.5139,winter,3.446129
4,New York,2010-01-05,2.683528,winter,3.026442


Рассчитаем среднюю температуру и стандартное отклонение для каждого сезона в каждом городе.

In [6]:
%%time
data['mean_temperature'] = data.groupby(['city', 'season'])['temperature'].transform('mean')
data.head()

CPU times: user 15.6 ms, sys: 833 µs, total: 16.5 ms
Wall time: 25.5 ms


Unnamed: 0,city,timestamp,temperature,season,rolling_mean,mean_temperature
0,New York,2010-01-01,3.353857,winter,4.509645,0.327685
1,New York,2010-01-02,6.865711,winter,2.77171,0.327685
2,New York,2010-01-03,-0.888902,winter,2.882208,0.327685
3,New York,2010-01-04,-9.5139,winter,3.446129,0.327685
4,New York,2010-01-05,2.683528,winter,3.026442,0.327685


In [7]:
%%time
data['std_temperature'] = data.groupby(['city', 'season'])['temperature'].transform('std')
data.head()

CPU times: user 14.5 ms, sys: 0 ns, total: 14.5 ms
Wall time: 17.7 ms


Unnamed: 0,city,timestamp,temperature,season,rolling_mean,mean_temperature,std_temperature
0,New York,2010-01-01,3.353857,winter,4.509645,0.327685,4.921422
1,New York,2010-01-02,6.865711,winter,2.77171,0.327685,4.921422
2,New York,2010-01-03,-0.888902,winter,2.882208,0.327685,4.921422
3,New York,2010-01-04,-9.5139,winter,3.446129,0.327685,4.921422
4,New York,2010-01-05,2.683528,winter,3.026442,0.327685,4.921422


Выявим аномалии, где температура выходит за пределы $ \text{среднее} \pm 2\sigma $.

In [8]:
%%time
data['anomaly'] = (
    (data['temperature'] > data['mean_temperature'] + 2 * data['std_temperature']) |
    (data['temperature'] < data['mean_temperature'] - 2 * data['std_temperature'])
)

CPU times: user 1.87 ms, sys: 1.95 ms, total: 3.83 ms
Wall time: 3.59 ms


### Сравнение

Попробуем распараллелить проведение этого анализа. Сравним скорость выполнения анализа с распараллеливанием и без него.

In [9]:
def temperature_analyze(data):
    data['rolling_mean'] = data.set_index('timestamp').groupby('city')['temperature'].rolling(window='30D',min_periods=1).mean().reset_index(level=[0,1], drop=True)
    data['mean_temperature'] = data.groupby(['city', 'season'])['temperature'].transform('mean')
    data['std_temperature'] = data.groupby(['city', 'season'])['temperature'].transform('std')
    data['anomaly'] = ((data['temperature'] > data['mean_temperature'] + 2 * data['std_temperature']) |
                       (data['temperature'] < data['mean_temperature'] - 2 * data['std_temperature']))
    return data

In [10]:
%%time
linear_results = temperature_analyze(data)

CPU times: user 38.3 ms, sys: 8.75 ms, total: 47.1 ms
Wall time: 47.6 ms


In [11]:
def parallel_temperature_analyze(data, num_processes=4):
    chunks = [data.iloc[i::num_processes, :] for i in range(num_processes)]

    with Pool(num_processes) as pool:
        results = pool.map(temperature_analyze, chunks)

    return pd.concat(results, ignore_index=True)

In [12]:
%%time
parallel_results = parallel_temperature_analyze(data)

CPU times: user 53 ms, sys: 49.9 ms, total: 103 ms
Wall time: 211 ms


## 2. Мониторинг текущей температуры

Подключим OpenWeatherMap API для получения текущей температуры города, после регистрации на сайте.

In [13]:
API_KEY = ''

### Вычисления

Получим текущую температуру для выбранного города через OpenWeatherMap API.

In [45]:
params = {'q': 'Moscow', 'appid': API_KEY, 'units': 'metric', 'lang': 'ru'}
city_temp = requests.get('http://api.openweathermap.org/data/2.5/weather', params)
moscow_t = city_temp.json()['main']['temp']
moscow_t

-0.29

Определим, является ли текущая температура нормальной, исходя из исторических данных для текущего сезона.

In [15]:
moscow_mean_t = data[(data['city']=='Moscow') & (data['season']=='winter')]['mean_temperature'].values[0]
moscow_mean_t

-9.677060766483745

In [16]:
moscow_std_t = data[(data['city']=='Moscow') & (data['season']=='winter')]['std_temperature'].values[0]
moscow_std_t

4.721910932638058

In [17]:
moscow_anomaly_t = (
    (moscow_t > moscow_mean_t + 2 * moscow_std_t) |
    (moscow_t < moscow_mean_t - 2 * moscow_std_t)
)
moscow_anomaly_t

False

Температура в Москве нормальная

Протестируем свое решение для разных городов.

In [18]:
def test_t(city):
    params = {'q': city, 'appid': API_KEY, 'units': 'metric', 'lang': 'ru'}
    city_temp = requests.get('http://api.openweathermap.org/data/2.5/weather', params)
    city_t = city_temp.json()['main']['temp']
    city_mean_t = data[(data['city']==city) & (data['season']=='winter')]['mean_temperature'].values[0]
    city_std_t = data[(data['city']==city) & (data['season']=='winter')]['std_temperature'].values[0]
    city_anomaly_t = ((city_t > city_mean_t + 2 * city_std_t) |
                        (city_t < city_mean_t - 2 * city_std_t))
    return city_anomaly_t

In [46]:
test_t('New York')

False

In [47]:
test_t('London')

False

In [49]:
test_t('Cairo')

False

На данный момент температура в городах без аномалий.

### Сравнение

Попробуем для получения текущей температуры использовать синхронные и асинхронные методы.

In [41]:
city_id = 0
temp_now = {}
cities = data['city'].unique()

def sinc(cities):
    for city in cities:
        params = {'q': city, 'appid': API_KEY, 'units': 'metric', 'lang': 'ru'}
        city_temp = requests.get('http://api.openweathermap.org/data/2.5/weather', params)
        data_temp = city_temp.json()

In [42]:
async def get_weather(city):
    async with aiohttp.ClientSession() as session:
        async with session.get(f"http://api.openweathermap.org/data/2.5/weather?q={city}&units=metric&lang=ru&APPID={API_KEY}") as response:
            weather_json = await response.json()

async def main(cities_):
    tasks = []
    for city in cities_:
        tasks.append(asyncio.create_task(get_weather(city)))

    for task in tasks:
        await task

In [43]:
%%time
sinc(cities)

CPU times: user 47.7 ms, sys: 3.93 ms, total: 51.6 ms
Wall time: 313 ms


In [44]:
%%time
asyncio.run(main(cities))

CPU times: user 19.1 ms, sys: 9.03 ms, total: 28.2 ms
Wall time: 38.7 ms


Лучше использовать асинхронные метод