In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm
from citymobil_python_mysql_wrapper import MysqlWrapper
import pyexasol
import logging
import sys
import json
import datetime

import pyproj    
import shapely
import shapely.wkt
import shapely.ops as ops
from shapely.geometry.polygon import Polygon
from shapely.geometry import MultiPolygon, LineString
from shapely.ops import cascaded_union
from functools import partial

from keplergl import KeplerGl
import seaborn as sns
import pygsheets

In [2]:
cred = pd.read_json(r'/Users/skostuchik/crd_exa.json')
user = cred.login.squeeze()
password_mysql = cred.password_mysql.squeeze()
password_exa = cred.password_exa.squeeze()
google_cred = cred.google_cred.squeeze()

#user = 's.kostiuchk'
#password_mysql = 'my_password_mysql'
#password_exa = 'my_password_exa'

In [3]:
#initialize connect to Exasol
C = pyexasol.connect(dsn='ex1..3.city-srv.ru:8563', user=user, password=password_exa, fetch_dict=True)

# initialize connect to MySQL Test
mysql = MysqlWrapper(
    logging.getLogger(),
    "webtest0.stl.msk.city-srv.ru",
    3306,
    user,
    password_mysql,
    "city")

In [4]:
#Словарь со всеми id и названиями локалити
loc_data = pd.DataFrame(C.execute('select LOCALITY_RK, SHORT_NAME from md.locality').fetchall())
localities = {r.LOCALITY_RK : r.SHORT_NAME.replace('"','') for i,r in loc_data.iterrows()}

Forecast Data

In [5]:
forecast_data_initial = pd.read_csv('/Users/skostuchik/projects/various_files/geominimal_suggestions_test2.csv')

In [6]:
forecast_data_initial = forecast_data_initial[forecast_data_initial['locality'] == 22534].drop(
    ['locality', 'created_date', 'version', 'labels'], axis=1)

In [7]:
forecast_data_initial['time_slot'] = forecast_data_initial['hour'].apply(
    lambda x: (datetime.datetime(2000, 1, 1) + datetime.timedelta(hours=x)).strftime("%H:%M"))

In [8]:
#чтобы потом соотнести это с сетапами, которые устанавливаются на 30-минутные интервалы,
#дублируем все строки с и добавляем ко времени 30 мин
forecast_data = pd.DataFrame(columns = forecast_data_initial.columns)

for i,r in forecast_data_initial.iterrows():
    
    h, m = r.time_slot.split(':')
    
    forecast_data = forecast_data.append({'weekday' : r.weekday, 'hour' : r.hour, 'price' : r.price,
                                          'polygon' : r.polygon, 
                                          'time_slot' : (datetime.datetime(2000, 1, 1, int(h), int(m)) + 
                                                         datetime.timedelta(minutes=30)).strftime("%H:%M")},
                                         ignore_index=True)
    
forecast_data = forecast_data.append(forecast_data_initial)

Setup Data

In [9]:
setup_sql = '''
select gch.CAMPAIGN_ID, gch.CAMPAIGN_NAME, week_day,
    concat(
       case when FROM_HOUR <= 9 then concat('0', FROM_HOUR) else FROM_HOUR end, ':',
       case when FROM_MINUTE <= 9 then concat('0', FROM_MINUTE) else FROM_MINUTE end) time_slot, gch.POLYGONS
from replica.geominimal_change_history gch
where gch.ID_LOCALITY = 22534
    and (gch.TIME_END is null or to_date(gch.TIME_END) > current_date)
    and gch.ACTIVE = 1
'''

In [10]:
setup_data = pd.DataFrame(C.execute(setup_sql).fetchall())

In [11]:
setup_data.dropna(inplace=True)

Data Transformation

In [14]:
s_df = pd.DataFrame(columns = ['WEEK_DAY','TIME_SLOT','CAMPAIGN_ID','CAMPAIGN_NAME','polygon_id',
                               'polygon_name','price','poly'])

In [15]:
for index, row  in setup_data.iterrows():
    for i in range(len(json.loads(row.POLYGONS))):
        coords_list = []
        for j in json.loads(row.POLYGONS)[i].get('coords'):
            coords_list.append(tuple([float(j[1]), float(j[0])]))
        
        
        s_df = s_df.append({'WEEK_DAY' : row.WEEK_DAY, 'TIME_SLOT' : row.TIME_SLOT,
                            'CAMPAIGN_ID' : row.CAMPAIGN_ID, 'CAMPAIGN_NAME' : row.CAMPAIGN_NAME,
                            'polygon_id' : json.loads(row.POLYGONS)[i].get('id'),
                            'polygon_name' : json.loads(row.POLYGONS)[i].get('name'),
                            'price' : json.loads(row.POLYGONS)[i].get('price'),
                            'poly' : Polygon(coords_list)
                            }, ignore_index=True)

In [21]:
def polygon_transform(x):
    '''Функция преобразует координаты в полигон'''
    return(Polygon(json.loads(x).get('coordinates')[0]))

In [22]:
forecast_data['polygon'] = forecast_data['polygon'].apply(polygon_transform)

In [23]:
forecast_df = pd.DataFrame(columns = ['weekday','time_slot','multipoly'])

In [21]:
for wd in forecast_data.weekday.unique():
    for ts in forecast_data.time_slot.unique():
        wd_ts = forecast_data[(forecast_data['weekday'] == wd) & (forecast_data['time_slot'] == ts)]
        if len(wd_ts)==0:
            continue
        else:
            list_poly = []
            for p in wd_ts.polygon:
                list_poly.append(p)
            
            #если в данном временном интервале некоторые полигоны пересекаются, поправляем через cascaded_union
            cu = [cascaded_union(list_poly)] if type(
                cascaded_union(list_poly)) == Polygon else cascaded_union(list_poly)
            mp = MultiPolygon(cu)

            forecast_df = forecast_df.append({'weekday' : wd,'time_slot' : ts,
                                              'multipoly' : mp},
                                             ignore_index=True)

In [22]:
setup_df = pd.DataFrame(columns = ['weekday','time_slot','multipoly'])

In [23]:
for wd in s_df.WEEK_DAY.unique():
    for ts in s_df.TIME_SLOT.unique():
        wd_ts = s_df[(s_df['WEEK_DAY'] == wd) & (s_df['TIME_SLOT'] == ts)]
        if len(wd_ts)==0:
            continue
        else:
            list_poly = []
            for p in wd_ts.poly:
                list_poly.append(p)
                
            #если в данном временном интервале некоторые полигоны пересекаются, поправляем через cascaded_union
            cu = [cascaded_union(list_poly)] if type(
                cascaded_union(list_poly)) == Polygon else cascaded_union(list_poly)
            mp = MultiPolygon(cu)

            setup_df = setup_df.append({'weekday' : wd,'time_slot' : ts,
                                        'multipoly' : mp},
                                       ignore_index=True)

In [24]:
df = forecast_df.merge(setup_df, how='left', on=['weekday','time_slot'])
df.columns = ['weekday', 'time_slot', 'multipoly_forecast', 'multipoly_setup']

In [25]:
zero_poly = Polygon([(0.0, 0.0), (0.0, 0.0), (0.0, 0.0)])

df.multipoly_setup.fillna(zero_poly, inplace = True)

In [26]:
def intersection_safe(x):
    '''Функция возвращает полигон, который является пересечением двух мультиполигонов'''
    return(x['multipoly_forecast'].intersection(x['multipoly_setup']))

In [27]:
df['fs_intersection'] = df.apply(intersection_safe, axis = 1)

In [28]:
def get_area(x):
    '''Функция возвращает площадь полигона x'''
    try:
        geom_area = ops.transform(
            partial(
                pyproj.transform,
                pyproj.Proj(init='EPSG:4326'),
                pyproj.Proj(
                    proj='aea',
                    lat_1=x.bounds[1],
                    lat_2=x.bounds[3]
                )
            ),
            x)
        return(geom_area.area)
    except:
        return(0)

In [29]:
#Расчитываем площадь мультиполигона предложения и полигона пересечения предложения и сетапа
df['f_area'] = df['multipoly_forecast'].apply(get_area)
df['i_area'] = df['fs_intersection'].apply(get_area)

#Рассчитываем долю площади пересечения в площади предложения
df['intersect_share'] = (df['i_area'] / df['f_area']).round(2)

  return _prepare_from_string(" ".join(pjargs))
  projstring = _prepare_from_string(" ".join((projstring, projkwargs)))
  return _prepare_from_string(" ".join(pjargs))
  projstring = _prepare_from_string(" ".join((projstring, projkwargs)))
  return _prepare_from_string(" ".join(pjargs))
  projstring = _prepare_from_string(" ".join((projstring, projkwargs)))
  return _prepare_from_string(" ".join(pjargs))
  projstring = _prepare_from_string(" ".join((projstring, projkwargs)))
  return _prepare_from_string(" ".join(pjargs))
  projstring = _prepare_from_string(" ".join((projstring, projkwargs)))
  return _prepare_from_string(" ".join(pjargs))
  projstring = _prepare_from_string(" ".join((projstring, projkwargs)))
  return _prepare_from_string(" ".join(pjargs))
  projstring = _prepare_from_string(" ".join((projstring, projkwargs)))
  return _prepare_from_string(" ".join(pjargs))
  projstring = _prepare_from_string(" ".join((projstring, projkwargs)))
  return _prepare_from_string(" ".join(p

  return _prepare_from_string(" ".join(pjargs))
  projstring = _prepare_from_string(" ".join((projstring, projkwargs)))
  return _prepare_from_string(" ".join(pjargs))
  projstring = _prepare_from_string(" ".join((projstring, projkwargs)))
  return _prepare_from_string(" ".join(pjargs))
  projstring = _prepare_from_string(" ".join((projstring, projkwargs)))
  return _prepare_from_string(" ".join(pjargs))
  projstring = _prepare_from_string(" ".join((projstring, projkwargs)))
  return _prepare_from_string(" ".join(pjargs))
  projstring = _prepare_from_string(" ".join((projstring, projkwargs)))
  return _prepare_from_string(" ".join(pjargs))
  projstring = _prepare_from_string(" ".join((projstring, projkwargs)))
  return _prepare_from_string(" ".join(pjargs))
  projstring = _prepare_from_string(" ".join((projstring, projkwargs)))
  return _prepare_from_string(" ".join(pjargs))
  projstring = _prepare_from_string(" ".join((projstring, projkwargs)))
  return _prepare_from_string(" ".join(p

  return _prepare_from_string(" ".join(pjargs))
  projstring = _prepare_from_string(" ".join((projstring, projkwargs)))
  return _prepare_from_string(" ".join(pjargs))
  projstring = _prepare_from_string(" ".join((projstring, projkwargs)))
  return _prepare_from_string(" ".join(pjargs))
  projstring = _prepare_from_string(" ".join((projstring, projkwargs)))
  return _prepare_from_string(" ".join(pjargs))
  projstring = _prepare_from_string(" ".join((projstring, projkwargs)))
  return _prepare_from_string(" ".join(pjargs))
  projstring = _prepare_from_string(" ".join((projstring, projkwargs)))
  return _prepare_from_string(" ".join(pjargs))
  projstring = _prepare_from_string(" ".join((projstring, projkwargs)))
  return _prepare_from_string(" ".join(pjargs))
  projstring = _prepare_from_string(" ".join((projstring, projkwargs)))
  return _prepare_from_string(" ".join(pjargs))
  projstring = _prepare_from_string(" ".join((projstring, projkwargs)))
  return _prepare_from_string(" ".join(p

  return _prepare_from_string(" ".join(pjargs))
  projstring = _prepare_from_string(" ".join((projstring, projkwargs)))
  return _prepare_from_string(" ".join(pjargs))
  projstring = _prepare_from_string(" ".join((projstring, projkwargs)))
  return _prepare_from_string(" ".join(pjargs))
  projstring = _prepare_from_string(" ".join((projstring, projkwargs)))
  return _prepare_from_string(" ".join(pjargs))
  projstring = _prepare_from_string(" ".join((projstring, projkwargs)))
  return _prepare_from_string(" ".join(pjargs))
  projstring = _prepare_from_string(" ".join((projstring, projkwargs)))
  return _prepare_from_string(" ".join(pjargs))
  projstring = _prepare_from_string(" ".join((projstring, projkwargs)))
  return _prepare_from_string(" ".join(pjargs))
  projstring = _prepare_from_string(" ".join((projstring, projkwargs)))
  return _prepare_from_string(" ".join(pjargs))
  projstring = _prepare_from_string(" ".join((projstring, projkwargs)))
  return _prepare_from_string(" ".join(p

  return _prepare_from_string(" ".join(pjargs))
  projstring = _prepare_from_string(" ".join((projstring, projkwargs)))
  return _prepare_from_string(" ".join(pjargs))
  projstring = _prepare_from_string(" ".join((projstring, projkwargs)))
  return _prepare_from_string(" ".join(pjargs))
  projstring = _prepare_from_string(" ".join((projstring, projkwargs)))
  return _prepare_from_string(" ".join(pjargs))
  projstring = _prepare_from_string(" ".join((projstring, projkwargs)))
  return _prepare_from_string(" ".join(pjargs))
  projstring = _prepare_from_string(" ".join((projstring, projkwargs)))
  return _prepare_from_string(" ".join(pjargs))
  projstring = _prepare_from_string(" ".join((projstring, projkwargs)))
  return _prepare_from_string(" ".join(pjargs))
  projstring = _prepare_from_string(" ".join((projstring, projkwargs)))
  return _prepare_from_string(" ".join(pjargs))
  projstring = _prepare_from_string(" ".join((projstring, projkwargs)))
  return _prepare_from_string(" ".join(p

  return _prepare_from_string(" ".join(pjargs))
  projstring = _prepare_from_string(" ".join((projstring, projkwargs)))
  return _prepare_from_string(" ".join(pjargs))
  projstring = _prepare_from_string(" ".join((projstring, projkwargs)))
  return _prepare_from_string(" ".join(pjargs))
  projstring = _prepare_from_string(" ".join((projstring, projkwargs)))
  return _prepare_from_string(" ".join(pjargs))
  projstring = _prepare_from_string(" ".join((projstring, projkwargs)))
  return _prepare_from_string(" ".join(pjargs))
  projstring = _prepare_from_string(" ".join((projstring, projkwargs)))
  return _prepare_from_string(" ".join(pjargs))
  projstring = _prepare_from_string(" ".join((projstring, projkwargs)))
  return _prepare_from_string(" ".join(pjargs))
  projstring = _prepare_from_string(" ".join((projstring, projkwargs)))
  return _prepare_from_string(" ".join(pjargs))
  projstring = _prepare_from_string(" ".join((projstring, projkwargs)))
  return _prepare_from_string(" ".join(p

  return _prepare_from_string(" ".join(pjargs))
  projstring = _prepare_from_string(" ".join((projstring, projkwargs)))
  return _prepare_from_string(" ".join(pjargs))
  projstring = _prepare_from_string(" ".join((projstring, projkwargs)))
  return _prepare_from_string(" ".join(pjargs))
  projstring = _prepare_from_string(" ".join((projstring, projkwargs)))
  return _prepare_from_string(" ".join(pjargs))
  projstring = _prepare_from_string(" ".join((projstring, projkwargs)))
  return _prepare_from_string(" ".join(pjargs))
  projstring = _prepare_from_string(" ".join((projstring, projkwargs)))
  return _prepare_from_string(" ".join(pjargs))
  projstring = _prepare_from_string(" ".join((projstring, projkwargs)))
  return _prepare_from_string(" ".join(pjargs))
  projstring = _prepare_from_string(" ".join((projstring, projkwargs)))
  return _prepare_from_string(" ".join(pjargs))
  projstring = _prepare_from_string(" ".join((projstring, projkwargs)))
  return _prepare_from_string(" ".join(p

# Доля пересечения по дням недели и получасовым интеравлам

In [30]:
match_table = df.pivot(index='time_slot', columns='weekday', values='intersect_share').fillna('')

In [31]:
#Преобразование и форматирование - для Регины :)
res = []
for r in match_table.values:
    c_res = []
    for c in r:
        if isinstance(c, float):
            c_res.append(f'{c*100}%')
        else:
            c_res.append('')
    
    res.append(c_res)

m_data = pd.DataFrame(res, index = match_table.index, columns = match_table.columns)

In [32]:
m_data

weekday,1,2,3,4,5,6,7
time_slot,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
00:00,,,,0.0%,,0.0%,0.0%
00:30,,,,0.0%,,0.0%,0.0%
05:00,,,,,,,0.0%
05:30,,,,,,,0.0%
07:00,0.0%,0.0%,0.0%,0.0%,0.0%,,
07:30,25.0%,23.0%,17.0%,26.0%,21.0%,,
08:00,21.0%,,15.0%,6.0%,0.0%,,
08:30,21.0%,,15.0%,6.0%,0.0%,,
09:00,,,,,,0.0%,
09:30,,,,,,0.0%,


In [33]:
#ответ
match_table.style.background_gradient(cmap="PuBu")

weekday,1,2,3,4,5,6,7
time_slot,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
00:00,,,,0.0,,0.0,0.0
00:30,,,,0.0,,0.0,0.0
05:00,,,,,,,0.0
05:30,,,,,,,0.0
07:00,0.0,0.0,0.0,0.0,0.0,,
07:30,0.25,0.23,0.17,0.26,0.21,,
08:00,0.21,,0.15,0.06,0.0,,
08:30,0.21,,0.15,0.06,0.0,,
09:00,,,,,,0.0,
09:30,,,,,,0.0,


***

In [34]:
#смотрим полностью все данные по сетапам и предложениям
#до этого мы смотрели только на те наблюдения, где есть предложения

In [35]:
t = pd.DataFrame({
    "time_slot" : [(datetime.datetime(2000, 1, 1) +
                    datetime.timedelta(minutes=30*i)).strftime("%H:%M") for i in range(48)],
    "key" : [1 for i in range(48)]
    })

wd = pd.DataFrame({
    "weekday" : [i+1 for i in range(7)],
    "key" : [1 for i in range(7)]
    })

wdt = wd.merge(t, on ='key').drop("key", axis=1)

In [36]:
full_df = wdt.merge(forecast_df, how='left', on=['weekday','time_slot']
                   ).merge(setup_df, how='left', on=['weekday','time_slot'])
full_df.columns = ['weekday', 'time_slot', 'multipoly_forecast', 'multipoly_setup']

In [39]:
full_df.fillna(zero_poly, inplace = True)

In [40]:
#экспорт
#full_df.to_excel(r'voronezh_suggest_vs_act_igor.xlsx', sheet_name='voronezh_suggest_vs_act', index = False)

In [34]:
#visualisation
#лучше использовать веб-версию Kepler'а, загрузив в него данные из csv

In [35]:
def geojson_converter(x):
    return(json.dumps(shapely.geometry.mapping(x)))

In [36]:
full_df['fc'] = full_df['multipoly_forecast'].apply(lambda x: json.dumps(shapely.geometry.mapping(x)))
full_df['st'] = full_df['multipoly_setup'].apply(lambda x: json.dumps(shapely.geometry.mapping(x)))

In [37]:
map_1 = KeplerGl(height=800)
map_1.add_data(data=full_df[['weekday', 'time_slot', 'fc', 'st']], name='fc_st')

User Guide: https://docs.kepler.gl/docs/keplergl-jupyter


In [38]:
config = {
    'version': 'v1',
    'config': {
        'visState': {
            'filters': [{'dataId': ['fc_st'],
                         'id': 'q7ix4vf5c',
                         'name': ['weekday'],
                         'type': 'range',
                         'value': [1, 1.18],
                         'enlarged': False,
                         'plotType': 'histogram',
                         'animationWindow': 'free',
                         'yAxis': None,
                         'speed': 1},
                        {'dataId': ['fc_st'],
                         'id': 'j4h33xic4',
                         'name': ['time_slot'],
                         'type': 'timeRange',
                         'value': [1632268800000, 1632270851000],
                         'enlarged': True,
                         'plotType': 'histogram',
                         'animationWindow': 'free',
                         'yAxis': None,
                         'speed': 1}],
            'mapState': {
                'bearing': 0,
                'dragRotate': False,
                'latitude': 53.63485799585258,
                'longitude': 59.77248571074474,
                'pitch': 0,
                'zoom': 2.729875832328426,
                'isSplit': False}
            }
    }
}

map_1.config = config

In [39]:
map_1

KeplerGl(config={'version': 'v1', 'config': {'visState': {'filters': [{'dataId': ['fc_st'], 'id': 'q7ix4vf5c',…