In [1]:
import pandas as pd
import numpy as np

# Реальные средние температуры (примерные данные) для городов по сезонам
seasonal_temperatures = {
    "New York": {"winter": 0, "spring": 10, "summer": 25, "autumn": 15},
    "London": {"winter": 5, "spring": 11, "summer": 18, "autumn": 12},
    "Paris": {"winter": 4, "spring": 12, "summer": 20, "autumn": 13},
    "Tokyo": {"winter": 6, "spring": 15, "summer": 27, "autumn": 18},
    "Moscow": {"winter": -10, "spring": 5, "summer": 18, "autumn": 8},
    "Sydney": {"winter": 12, "spring": 18, "summer": 25, "autumn": 20},
    "Berlin": {"winter": 0, "spring": 10, "summer": 20, "autumn": 11},
    "Beijing": {"winter": -2, "spring": 13, "summer": 27, "autumn": 16},
    "Rio de Janeiro": {"winter": 20, "spring": 25, "summer": 30, "autumn": 25},
    "Dubai": {"winter": 20, "spring": 30, "summer": 40, "autumn": 30},
    "Los Angeles": {"winter": 15, "spring": 18, "summer": 25, "autumn": 20},
    "Singapore": {"winter": 27, "spring": 28, "summer": 28, "autumn": 27},
    "Mumbai": {"winter": 25, "spring": 30, "summer": 35, "autumn": 30},
    "Cairo": {"winter": 15, "spring": 25, "summer": 35, "autumn": 25},
    "Mexico City": {"winter": 12, "spring": 18, "summer": 20, "autumn": 15},
}

# Сопоставление месяцев с сезонами
month_to_season = {12: "winter", 1: "winter", 2: "winter",
                   3: "spring", 4: "spring", 5: "spring",
                   6: "summer", 7: "summer", 8: "summer",
                   9: "autumn", 10: "autumn", 11: "autumn"}

# Генерация данных о температуре
def generate_realistic_temperature_data(cities, num_years=10):
    dates = pd.date_range(start="2010-01-01", periods=365 * num_years, freq="D")
    data = []

    for city in cities:
        for date in dates:
            season = month_to_season[date.month]
            mean_temp = seasonal_temperatures[city][season]
            # Добавляем случайное отклонение
            temperature = np.random.normal(loc=mean_temp, scale=5)
            data.append({"city": city, "timestamp": date, "temperature": temperature})

    df = pd.DataFrame(data)
    df['season'] = df['timestamp'].dt.month.map(lambda x: month_to_season[x])
    return df

# Генерация данных
data = generate_realistic_temperature_data(list(seasonal_temperatures.keys()))
data.to_csv('temperature_data.csv', index=False)


## 1. Анализ исторических данных

In [3]:
import asyncio
import nest_asyncio
nest_asyncio.apply()

#### Без асинхорнности

In [4]:
%%time
df = data.copy()
df['moving_avg'] = df.groupby('city').temperature.rolling(window=30, min_periods=1, center=True).mean().droplevel(0)
grouping = df.groupby(by=['season', 'city']).agg({'temperature': ['mean', 'std']}).temperature.reset_index()
df = df.merge(grouping, 'left', left_on=['season', 'city'], right_on=['season', 'city'])
df['anomaly'] = (df['temperature'] < df['mean'] - 2 * df['std']) | (df['temperature'] > df['mean'] + 2 * df['std'])

CPU times: user 51.2 ms, sys: 6.03 ms, total: 57.2 ms
Wall time: 67 ms


#### С асинхронностью

In [5]:
async def get_moving_avg(df, **kwargs):
  return df.groupby('city').temperature.rolling(**kwargs).mean().droplevel(0).rolling(**kwargs).mean()

async def get_agg_anomalies(df):
  grouping = df.groupby(by=['season', 'city']).agg({'temperature': ['mean', 'std']}).temperature.reset_index()
  df = df.merge(grouping, 'left', left_on=['season', 'city'], right_on=['season', 'city'])
  df['anomaly'] = (df['temperature'] < df['mean'] - 2 * df['std']) | (df['temperature'] > df['mean'] + 2 * df['std'])
  return df

async def tasks(df):
  new_df, moving_avg = await asyncio.gather(
      get_agg_anomalies(df),
      get_moving_avg(df, window=30, min_periods=1, center=True)
  )
  new_df['moving_avg'] = moving_avg
  return new_df

In [6]:
%%time
df = data.copy()
df = asyncio.run(tasks(df))

CPU times: user 50.9 ms, sys: 4.09 ms, total: 55 ms
Wall time: 54.3 ms


Получилось чуть быстрее

## 2. Мониторинг текущей температуры

In [7]:
import requests
import aiohttp
import datetime

#### Без асинхронности

In [9]:
def get_temp(city):
  coords = requests.get(
      f'http://api.openweathermap.org/geo/1.0/direct?q={city}&limit=1&appid={TOKEN}'
  ).json()[0]

  weather = requests.get(
      f'https://api.openweathermap.org/data/2.5/weather?lat={coords["lat"]}&lon={coords["lon"]}&appid={TOKEN}',
      params={'units': 'metric'}
  ).json()

  return weather['main']['temp']

In [11]:
def is_cur_anomaly(city):
  temp = get_temp(city)
  city_date_info = df[(df.city==city)&(df.season==month_to_season.get(datetime.date.today().month))].iloc[0]
  anomaly = not (city_date_info['mean'] - 2 * city_date_info['std'] <= temp <= city_date_info['mean'] + 2 * city_date_info['std'])
  return {'temp': temp, 'anomaly': anomaly}

In [12]:
%%time
res = [is_cur_anomaly(city) for city in df.city.unique()]

CPU times: user 351 ms, sys: 17.8 ms, total: 369 ms
Wall time: 8.3 s


#### С асинхронностью

In [14]:
async def get_temp_async(city, session):
  async with session.get(f'http://api.openweathermap.org/geo/1.0/direct?q={city}&limit=1&appid={TOKEN}') as resp:
    coords = (await resp.json())[0]

  async with session.get(
      f'https://api.openweathermap.org/data/2.5/weather?lat={coords["lat"]}&lon={coords["lon"]}&appid={TOKEN}',
      params={'units': 'metric'}
  ) as resp:
    return (await resp.json())['main']['temp']

In [18]:
async def is_cur_anomaly_async(city, session):
  temp = await get_temp_async(city, session)
  city_date_info = df[(df.city==city)&(df.season==month_to_season.get(datetime.date.today().month))].iloc[0]
  anomaly = not (city_date_info['mean'] - 2 * city_date_info['std'] <= temp <= city_date_info['mean'] + 2 * city_date_info['std'])
  return {'temp': temp, 'anomaly': anomaly}

In [19]:
async def city_tasks(cities):
  async with aiohttp.ClientSession() as session:
    tasks = [is_cur_anomaly_async(city, session) for city in cities]
    return await asyncio.gather(*tasks)

In [20]:
%%time
res = asyncio.run(city_tasks(df.city.unique()))

CPU times: user 370 ms, sys: 11.1 ms, total: 381 ms
Wall time: 711 ms


При нескольких запросах получается значительно быстрее