In [1]:
import pandas as pd
import multiprocessing as mp
import time
from tool.path import DOP_DIR

In [2]:
data = pd.read_csv(DOP_DIR / 'temperature_data.csv', parse_dates=['timestamp'])
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 54750 entries, 0 to 54749
Data columns (total 4 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   city         54750 non-null  object        
 1   timestamp    54750 non-null  datetime64[ns]
 2   temperature  54750 non-null  float64       
 3   season       54750 non-null  object        
dtypes: datetime64[ns](1), float64(1), object(2)
memory usage: 1.7+ MB


In [4]:
num_processes = mp.cpu_count()
print(f'Number of available CPU cores: {num_processes}')

Number of available CPU cores: 8


In [5]:
from tool.utils import add_features, paral_features

start_time = time.perf_counter()
df_seq = add_features(data)
seq_time = time.perf_counter() - start_time
print(f"Последовательное исполнение: {seq_time:.3f}s")

start_time = time.perf_counter()
df_par = paral_features(data, num_processes)
par_time = time.perf_counter() - start_time
print(f"Параллельное исполнение: {par_time:.3f}s")

print(f"Ускорение: {seq_time / par_time:.2f}x")

Последовательное исполнение: 0.034s
Параллельное исполнение: 1.103s
Ускорение: 0.03x


Параллельное исполнение показало значительное замедление. Вероятно, датасет слишком мал для оправдания затрат на развертывание параллелизации.

In [6]:
print("Последовательное исполнение:")
display(df_seq.head())
print("Параллельное исполнение:")
display(df_par.head())

Последовательное исполнение:


Unnamed: 0,city,timestamp,temperature,season,rolling_mean,season_mean,season_dev,outlier
0,New York,2010-01-01,-4.675253,winter,-4.675253,0.210276,5.176739,False
1,New York,2010-01-02,-7.039606,winter,-5.85743,0.210276,5.176739,False
2,New York,2010-01-03,-4.583685,winter,-5.432848,0.210276,5.176739,False
3,New York,2010-01-04,14.789048,winter,-0.377374,0.210276,5.176739,True
4,New York,2010-01-05,1.9031,winter,0.078721,0.210276,5.176739,False


Параллельное исполнение:


Unnamed: 0,city,timestamp,temperature,season,rolling_mean,season_mean,season_dev,outlier,seasonal_mean,seasonal_std
0,New York,2010-01-01,-4.675253,winter,-4.675253,0.210276,5.176739,False,0.210276,5.176739
1,New York,2010-01-02,-7.039606,winter,-5.85743,0.210276,5.176739,False,0.210276,5.176739
2,New York,2010-01-03,-4.583685,winter,-5.432848,0.210276,5.176739,False,0.210276,5.176739
3,New York,2010-01-04,14.789048,winter,-0.377374,0.210276,5.176739,True,0.210276,5.176739
4,New York,2010-01-05,1.9031,winter,0.078721,0.210276,5.176739,False,0.210276,5.176739


Алгоритмы дают одинаковый результат

In [8]:
from tool.utils import current_temperature_sync, current_temperature_async, check_temperature


cities = ["Moscow", "Dubai", "Cairo", "Beijing", "Berlin"]


def sync():
    results = []

    for city in cities:
        start_time = time.perf_counter()
        
        # Текущая температуру
        temperature_sync = current_temperature_sync(city)
        
        # Время выполнения
        sync_time = time.perf_counter() - start_time
        
        # Нормальная температура
        normal_temp = check_temperature(df_seq, city, temperature_sync)
        
        result = {
            'city': city,
            'current_temperature': temperature_sync,
            'normal': normal_temp,
            'time': sync_time,
        }
        results.append(result)

    display(pd.DataFrame(results))


async def _async():

    results = []
    for city in cities:
        start_time = time.perf_counter()
        temp_async = await current_temperature_async(city)
        async_time = time.perf_counter() - start_time
        normal_temp = check_temperature(df_seq, city, temp_async)

        result = {
            'city': city,
            'current_temperature': temp_async,
            'normal': normal_temp,
            'time': async_time,
        }
        results.append(result)

    display(pd.DataFrame(results))


print("Синхронный метод:")
sync()

print("Асинхронный метод:")
await _async()

Синхронный метод:


Unnamed: 0,city,current_temperature,normal,time
0,Moscow,0.16,False,0.288863
1,Dubai,22.96,True,0.341163
2,Cairo,16.42,True,0.25824
3,Beijing,-8.06,True,0.271038
4,Berlin,0.06,True,0.257239


Асинхронный метод:


Unnamed: 0,city,current_temperature,normal,time
0,Moscow,0.16,False,0.354281
1,Dubai,22.96,True,0.319033
2,Cairo,16.42,True,0.260751
3,Beijing,-8.06,True,0.343075
4,Berlin,0.06,True,0.260963


Асинхронные методы не дают буста, поэтому неважно