In [52]:
from datetime import datetime as dt, date, timedelta as td
import numpy as np
import pandas as pd
from scipy.special import factorial as fact
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from tqdm.notebook import tqdm, trange
import torch as pt
import matplotlib.pyplot as plt
import seaborn as sns
from time import sleep
from scipy import stats

from config.config import sql_params
from nea_schema.esi.mkt import MarketHist
from nea_schema.sde.map import Region
from nea_schema.sde.inv import Name, Type
from nea_schema.sde.bp import Blueprint, Activity, Material, Product
from tools import pull_bp_data, load_market_hist, build_material_matrix
from state_smoother import Smoother

sns.set()
pd.set_option('display.float_format', lambda x: '%.2f' % x)
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

In [2]:
avail_bps, bp_data, activity_data, material_data, product_data, invent_mats_data, invent_prod_data, invented_product_data = pull_bp_data(sql_params)

invent_material_matrix, invent_material_names, invent_product_names = build_material_matrix(
    invent_mats_data,
    invent_prod_data,
    pd.Series(0, index=invent_prod_data['blueprint_id'])
)
invent_material_matrix.index = invent_material_matrix.index.map(invented_product_data.set_index('blueprint_id')['type_id'])

full_prod_data = pd.concat([product_data, invented_product_data], axis=0).drop_duplicates()
mat_eff_mapping = full_prod_data[['blueprint_id']].set_index('blueprint_id').join(avail_bps['mat_eff'])['mat_eff']
mat_eff_mapping[invented_product_data['blueprint_id']] = 2
if np.any(pd.isnull(mat_eff_mapping)):
    raise Exception("Not all blueprint_id's are being covered by mat_eff_mapping")

material_matrix, material_names, product_names = build_material_matrix(
    material_data,
    full_prod_data,
    mat_eff_mapping,
    addit_matrix=invent_material_matrix
)

full_material_names = pd.concat([material_names, invent_material_names], axis=0).drop_duplicates()
full_product_names = pd.concat([product_names, invent_product_names], axis=0).drop_duplicates()

In [4]:
types = sorted([
    *[int(val) for val in material_matrix.index],
    *[int(val) for val in material_matrix.columns],
])

regions = sorted([
    10000068, 10000064, 10000032, 10000037
])

In [5]:
market_data = load_market_hist(types, regions, date(2019,5,1), date(2020,4,30), sql_params)

In [6]:
smooth_markets = {
    region_id: {
        point: Smoother(
            np.log(data), dims=4,
            horizon=14 if point == 'average' else 28,
            verbose=False, tqdm_leave=False,
        ) for point, data in val.items()
    } for region_id, val in market_data.items()
}

for val in smooth_markets.values():
    for sub_val in val.values():
        sub_val.verbose = True
        sub_val.learn(1000, learn_rate=1e-1)

HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))

In [48]:
region_profits = {}
for region_id, val in smooth_markets.items():
    smooth_vol = np.exp(pd.Series(val['volume'].state[-1,:,0].detach().cpu().numpy(), index=val['volume'].columns))
    smooth_avg = np.exp(pd.Series(val['average'].state[-1,:,0].detach().cpu().numpy(), index=val['average'].columns))
    
    type_cost = (material_matrix * smooth_avg.reindex(index=material_matrix.columns).values[np.newaxis,:]).sum(axis=1)
    type_price = smooth_avg.reindex(index=type_cost.index)
    type_profit = type_price - type_cost
    type_vol = smooth_vol.reindex(index=type_profit.index)
    type_revenue = type_profit * type_vol
    region_profits[region_id] = {
        'cost': type_cost,
        'price': type_price,
        'profit': type_profit,
        'vol': type_vol,
        'revenue': type_revenue,
    }

In [104]:
revenues = pd.concat([region['revenue'].rename(region_id) for region_id, region in region_profits.items()], axis=1).T
revenues.sum(axis=1).sort_values()

10000032   -159941676312.87
10000064    -32629986333.25
10000068    -22370546936.50
10000037      -736960972.81
dtype: float64

In [102]:
ranked_revenues = revenues.apply(lambda x: stats.rankdata(x)).astype(int)
ranked_revenues.sum(axis=1).sort_values()

10000064    2996
10000032    3126
10000037    3276
10000068    3372
dtype: int64

In [100]:
revenues_pos_only = {}
for type_id, data in revenues.to_dict().items():
    for region_id, value in data.items():
        if value > 0:
            revenues_pos_only[type_id] = revenues_pos_only.get(type_id, {})
            revenues_pos_only[type_id][region_id] = value

In [107]:
pd.DataFrame(revenues_pos_only).sum(axis=1).sort_values()

10000064    6275656584.35
10000032   13810523896.93
10000068   14830645395.01
10000037   24662917357.58
dtype: float64

In [101]:
ranked_revenues_pos_only = pd.Series(0, index=revenues.index)
for type_id, data in revenues_pos_only.items():
    data = pd.Series(data)
    data[:] = stats.rankdata(data)
    ranked_revenues_pos_only[data.index] += data
ranked_revenues_pos_only.astype(int).sort_values()

10000037    1788
10000064    1863
10000068    1971
10000032    2248
dtype: int64