# S6. App Structure

## Алгоритм

1. На входе данные за 1 час - показания по экологии и деперсонализированные события по авто
2. Каждый автомобиль получает или теряет баллы в зависимости от показаний экодатчиков в момент проезда
3. Математика начисления баллов (и выставления порогов) модернизируется с каждой итерацией
4. Авто, чьи баллы превысили порог, передаются на обработку экополиции, те возвращаются к нам с информацией о том, какая достигнута точность
5. Повторяем пока не получим нужную точность

## 1. Получение входных данных

1. Отсортировать данные по времени
2. Распределить по локациям
3. Сформировать временные фреймы
4. Упаковать по временным фреймам данные по авто и экологии


In [7]:
import pandas as pd
import math
from functools import reduce


eco_df = pd.read_csv("ecodata2.csv")
car_df = pd.read_csv("cardata2.csv")

eco_df.sort_values(by=['time'], inplace=True)
car_df.sort_values(by=['time'], inplace=True)

In [8]:
eco_df.head(10000000)

Unnamed: 0,id,city_id,location_id,camera_id,co,no2,so2,o3,pm25,pm10,temp,hum,time,ver,lat,lon,created
4999,18,2,741,4553,4.00,0.68,0,0.81,40,45,18.24,21.28,12/10/22 17:41,-1,43.237604,76.934758,12/10/22 11:41
4858,15,2,2,420,8.80,0.00,0,0.87,35,38,19.20,19.29,12/10/22 17:41,-1,43.238362,76.889989,12/10/22 11:41
4857,14,2,337,1724,12.80,5.23,0,0.00,63,68,19.28,21.45,12/10/22 17:41,-1,43.244170,76.915991,12/10/22 11:41
4856,12,2,780,4571,6.69,0.00,0,0.68,41,44,18.10,21.16,12/10/22 17:41,-1,43.199581,76.894464,12/10/22 11:41
4855,10,2,781,5779,7.69,5.46,0,0.00,78,85,18.93,21.44,12/10/22 17:41,-1,43.214086,76.898003,12/10/22 11:41
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4,18,2,741,4553,8.60,1.00,0,0.93,50,56,17.63,24.09,12/10/22 18:00,-1,43.237604,76.934758,12/10/22 11:59
3,15,2,2,420,9.89,0.00,0,0.85,68,80,18.66,19.98,12/10/22 18:00,-1,43.238362,76.889989,12/10/22 11:59
2,14,2,337,1724,10.39,4.86,0,1.03,56,61,18.54,22.46,12/10/22 18:00,-1,43.244170,76.915991,12/10/22 11:59
1,12,2,780,4571,6.09,0.00,0,0.56,49,53,17.77,21.82,12/10/22 18:00,-1,43.199581,76.894464,12/10/22 11:59


In [9]:
# def createTimeframe(time, camera_id):
#     eco_df_part = eco_df.loc[(pd.to_datetime(eco_df['time']) == time) & (eco_df['camera_id'] == camera_id)]
#     car_df_part = car_df.loc[(pd.to_datetime(eco_df['time']) == time) & (eco_df['camera_id'] == camera_id)]
    
#     return [eco_df_part, car_df_part]

# timeframes = dict()
# timeframe = dict()

# camera_ids = list(eco_df['camera_id'].unique())
# minutes = list(pd.to_datetime(eco_df['time'].unique()))

# for i in range(len(camera_ids)):
#     for j in range(len(minutes)):
#         timeframes[minutes[j]] = createTimeframe(minutes[j], camera_ids[i])
#         timeframe[camera_ids[i]] = timeframes
                
def createTimeframe(dataframes: list[dict], camera_id: str = None, time_from: str = None, time_to: str = None):
    df = pd.concat(dataframes)
    if camera_id is None:
        camera_ids = set(df['camera_id'])
        response = {}
        for camera_id in camera_ids:
            response[camera_id] = createTimeframe(
                dataframes = [dataframes[0], dataframes[1]],
                camera_id = camera_id,
                time_from = time_from,
                time_to = time_to
            )
        return response
    else:
        if time_from is None or time_to is None:
            for time in sorted(
                set(pd.to_datetime(df['time']))
            ):
                time_from = time
                time_to = time + pd.Timedelta(minutes=1)
                new_eco_df = dataframes[0].loc[
                    (pd.to_datetime(dataframes[0]['time']) >= time_from) & 
                    (pd.to_datetime(dataframes[0]['time']) < time_to) &
                    (dataframes[0]['camera_id'] == camera_id)
                ]
                new_car_df = dataframes[1].loc[
                    (pd.to_datetime(dataframes[1]['time']) >= time_from) & 
                    (pd.to_datetime(dataframes[1]['time']) < time_to) &
                    (dataframes[1]['camera_id'] == camera_id)
                ]
                
                return {time: [new_eco_df, new_car_df]}
        else:
            for time in sorted(
                set(pd.to_datetime(df['time']))
            ):
                time_to = pd.to_datetime(time_to)
                if time <= time_to:
                    new_eco_df = dataframes[0].loc[
                        (pd.to_datetime(dataframes[0]['time']) >= time_from) & 
                        (pd.to_datetime(dataframes[0]['time']) < time_to) &
                        (dataframes[0]['camera_id'] == camera_id)
                    ]
                    new_car_df = dataframes[1].loc[
                        (pd.to_datetime(dataframes[1]['time']) >= time_from) & 
                        (pd.to_datetime(dataframes[1]['time']) < time_to) &
                        (dataframes[1]['camera_id'] == camera_id)
                    ]
                    
                    return {time: [new_eco_df, new_car_df]}
                
timeframe = createTimeframe([eco_df, car_df])

In [10]:
for camera_id, by_camera in timeframe.items():
    for time, by_time in by_camera.items():
        print(time)

2022-12-10 17:41:00
2022-12-10 17:41:00
2022-12-10 17:41:00
2022-12-10 17:41:00
2022-12-10 17:41:00
2022-12-10 17:41:00
2022-12-10 17:41:00
2022-12-10 17:41:00
2022-12-10 17:41:00
2022-12-10 17:41:00


## 2. Начисление баллов

1. Сформировать хранилище для баллов
2. Подготовить функцию для начисления баллов
3. Прогнать входные данные через функцию начисления баллов по каждой локации фрейм за фреймом

In [11]:
score = {
    
}

In [12]:
pm_expect = {}

for camera in eco_df['camera_id'].unique():
    pm = eco_df[eco_df['camera_id']==camera]['pm25']
    pm_avg = sum(pm)/len(pm)
    pm_sq = math.sqrt(reduce(lambda a, b: a + (b - pm_avg)**2, pm, 0) / len(pm))
    pm_expect[camera] = pm_avg + pm_sq
    
cams = eco_df['camera_id'].unique()
    
def eco_scoring(timeframe_eco, timeframe_car, pm_expect):
    
    pm = timeframe_eco['pm25']
    cars = timeframe_car['id'].unique()
    
    for pm_item in pm:
        for cam_id in cams:
            for car_id in cars:
                if pm_item > pm_expect:
                    score[car_id]=+1
                elif pm_item < pm_expect:
                    score[car_id]=-1
                else:
                    score[car_id]=0

    return score

for camera_id, by_camera in timeframe.items():
    for time, by_time in by_camera.items():
        eco_scoring(by_time[0], by_time[1], pm_expect[camera_id])

counter = 0     
    
for car_id in set(car_df['id'].unique()):
    counter+=1

In [13]:
counter

5000

In [14]:
score

{'fb3b1895-8407-d870-546a-58b5018c213c': -1,
 '6f22fc34-f496-5023-a918-00ad122d734a': -1,
 '0f091d84-50aa-af97-499c-bd61f08e425a': -1,
 '5afd9c9a-4ad5-1eba-330c-c8e3879d08e2': -1,
 '4cfd08cb-696e-6c65-045c-a350a2e20907': -1,
 '5467e589-d807-8106-98d7-4ff81fd36ef7': -1,
 'a0d7c8b6-5dda-cfaf-b16d-a2234c6cf757': -1,
 '4c5c5571-a1a4-f277-4345-7dfca4b278ad': -1,
 '963cc37a-dca6-1520-b5a0-8a6af211257b': -1,
 'b8cf9a86-1427-ac96-0d17-db09f782cb87': -1,
 '91fbdf18-1aef-1c70-9c15-cc1b556d9e99': -1,
 '9adc444d-aa0c-0fef-adcf-0525eb25902a': -1,
 '5e2d5023-e80c-7a4a-9363-7d8c794686a3': -1,
 '19f0008a-7fc9-a663-505c-333a5caf8cee': -1,
 '0671dfb9-e614-7bde-d644-293c68ca54ca': -1,
 'ab61676a-907a-5ebc-0c08-bee2ac6a05a2': -1,
 'bb5f6afa-2f6a-36fe-aff9-35e2c813cc66': -1,
 'ffc7971e-1521-0fb8-a3f8-1dbe9226562d': -1,
 '5ef3d7b5-5c7d-cdd5-3226-eef11d9f54ff': -1,
 '5ea74bc3-82cb-1bb9-61c9-e1b5617e61da': -1,
 '45fae7b5-747b-6d82-a641-2221f8772313': -1,
 '03949b33-b9d1-ea45-c428-bc48f28512d6': -1,
 'a96d519a

## 3. Сохранение результата

Сохранить итоговое значение score в файл формата .csv

In [15]:
score_df = pd.DataFrame.from_dict(score, orient='index')

score_df.to_csv("score_data.csv")

In [16]:
score_df = pd.read_csv("score_data.csv", index_col=0)

In [17]:
score_df.head()

Unnamed: 0,0
fb3b1895-8407-d870-546a-58b5018c213c,-1
6f22fc34-f496-5023-a918-00ad122d734a,-1
0f091d84-50aa-af97-499c-bd61f08e425a,-1
5afd9c9a-4ad5-1eba-330c-c8e3879d08e2,-1
4cfd08cb-696e-6c65-045c-a350a2e20907,-1
