# Skripte


Ova *Jupyter* bilježnica služi za testiranje i demonstraciju *Python* skripti vlastite izrade.


In [1]:
##  PRIPREMA PYTHON OKRUZENJA

# IPython
from IPython import *
from IPython.display import *

# Standardna Python biblioteka
import copy
import math
import os
import six
import sys
import time

# SciPy paketi
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import sympy as sym

# Postavi prikaz grafova unutar biljeznice.
%matplotlib inline

# Postavi stil grafova na `ggplot'.
plt.style.use('ggplot')

# Inicijaliziraj ispis simbolickog racuna.
sym.init_printing()


In [2]:
df = pd.read_pickle('data/training_dataset_enc.pkl')


In [3]:
from script.timer import *
from script.feature_engineering import *


In [4]:
kp_df = df.groupby(['KLIJENT_ID', 'OZNAKA_PARTIJE'])


In [None]:
kp = np.array(
    [list(kp_x) for kp_x in kp_df.size().sort_values(ascending = False).index],
    dtype = int,
    order = 'C'
)


In [None]:
def diff_feat (df):
    diff = (
        df.STANJE_NA_KRAJU_KVARTALA.fillna(
            method = 'ffill'
        ).fillna(method = 'bfill') -
        df.STANJE_NA_KRAJU_PRETH_KVARTALA.fillna(
            method = 'ffill'
        ).fillna(method = 'bfill')
    )

    return (diff.mean(), diff.std())


In [None]:
def dur_feat (df):
    return (
        df.PLANIRANI_DATUM_ZATVARANJA.fillna(
            method = 'ffill'
        ).fillna(method = 'bfill') -
        df.DATUM_OTVARANJA.fillna(method = 'ffill').fillna(method = 'bfill')
    ).mean() / np.timedelta64(1, 'D')


In [None]:
N = 1000


In [None]:
I = np.array([], dtype = int)

while I.size < N:
    I = np.random.permutation(
        np.unique(np.random.randint(kp.shape[0], size = N, dtype = int))
    )


In [None]:
t0 = time.time()

feat_df = feat(
    [(tuple(kp_x), kp_df.get_group(tuple(kp_x))) for kp_x in kp[I]],
    [
        (('POMAK_AVG', 'POMAK_STD'), diff_feat),
        ('DULJINA_UGOVORA', dur_feat),
        (
            'UGOVORENI_IZNOS',
            lambda df : (
                df.UGOVORENI_IZNOS.fillna(
                    method = 'ffill'
                ).fillna(method = 'bfill').mean()
            )
        ),
        (
            'VISINA_KAMATE',
            lambda df : (
                df.VISINA_KAMATE.fillna(
                    method = 'ffill'
                ).fillna(method = 'bfill').mean()
            )
        ),
        ('STAROST', lambda df : df.STAROST.min()),
        (
            'PRIJEVREMENI_RASKID',
            lambda df : df.PRIJEVREMENI_RASKID.any().astype(np.uint8)
        )
    ]
)

t1 = time.time()

d = float(t1 - t0)

print(
    'Trajanje racunanja: {h:d}h {m:02d}m {s:06.3f}s ({S:.3f}s)'.format(
        S = d, **hms_time(d)
    )
)

del d
del t1
del t0


In [None]:
feat_df.info()


In [None]:
feat_df
