In [1]:
import pandas as pd
import numpy as np
from settings import Config
from mysql_db import Database
import pdcast as pdc
import s3_upload_download as s3con
import os
from datetime import datetime

# NOTEBOOK DESCRIPTION: 

Investigate the payments, the rezerves and ensure that every event has a monetary value associated with it.

NOTE: 'Value' refers to the full amount of money associated with a claim file, which is either
1. Only the payment (plati) value, when a claim file has been closed
2. Payment + reserve, when a claim file has not yet been closed.


In [2]:
# initialise the s3_connector object needed to read/write files into an S3 bucket
s3con = s3con.s3_connector()

In [6]:
# load the tables
claims = s3con.read('daune.feather')
claims = (
    claims
    .assign(acc_year = [claims.dataEveniment.iloc[x].year for x in range(claims.shape[0])])
    .replace
    .query("acc_year < 2022")
)

pay_log = pd.read_feather('events_values_log.feather')


In [10]:
claims.idPolita.nunique()

606788

In [12]:
claims.head()

Unnamed: 0,idDosar,idPolita,stare,dataDeschidere,dataAvizare,dataEveniment,tipEveniment,tipDauna,idEvent,acc_year
0,59897,2231006,inchis - achitat,2015-01-06,2015-01-06,2015-01-05,Pagube materiale,INTERNA,22310062015-01-05,2015
1,60564,2233977,inchis - achitat,2015-01-12,2015-01-12,2015-01-07,Pagube materiale,INTERNA,22339772015-01-07,2015
2,60672,2231966,inchis - achitat,2015-01-13,2015-01-13,2015-01-09,Pagube materiale,INTERNA,22319662015-01-09,2015
3,60732,2232835,inchis - achitat,2015-01-13,2015-01-13,2015-01-08,Pagube materiale,INTERNA,22328352015-01-08,2015
4,60984,2239842,inchis - achitat,2015-01-14,2015-01-14,2015-01-09,Pagube materiale,INTERNA,22398422015-01-09,2015


In [19]:
claims.idPolita.sort_values()

6346        2230466
509         2230478
5683        2230529
4501        2230537
22473       2230579
            ...    
687072    203841229
687374    203841884
687166    203856909
686920    203862224
687411    203862347
Name: idPolita, Length: 687941, dtype: uint32

In [20]:
pay_log.idPolita.sort_values()

0           2230466
1           2230478
2           2230529
3           2230537
4           2230579
            ...    
628013    203841229
628014    203841884
628015    203856909
628016    203862224
628017    203862347
Name: idPolita, Length: 638228, dtype: int64

In [18]:
pay_log.head()

Unnamed: 0,acc_year,idEvent,idPolita,plati_ron,plati_eur,res_ron,res_eur,val_ron,val_eur,tip
0,2015,0,2230466,4243.0,958.913415,0.0,0.0,4243.0,958.913415,MAT
1,2015,1,2230478,1225.0,277.431764,0.0,0.0,1225.0,277.431764,MAT
2,2015,2,2230529,0.0,0.0,0.0,0.0,0.0,0.0,MAT
3,2015,3,2230537,585.0,132.281114,0.0,0.0,585.0,132.281114,MAT
4,2015,4,2230579,4876.522696,1075.0,0.0,0.0,4876.522696,1075.0,MAT


In [16]:
claims.query("idEvent == '38411502016-12-25'")

Unnamed: 0,idDosar,idPolita,stare,dataDeschidere,dataAvizare,dataEveniment,tipEveniment,tipDauna,idEvent,acc_year
66686,151674,3841150,inchis - achitat,2017-01-31,2017-01-31,2016-12-25,Pagube materiale,EXTERNA,38411502016-12-25,2016
103339,189548,3841150,inchis - achitat,2017-07-03,2017-07-03,2016-12-25,"Vatamari corporale/deces, inclusive pentru pre...",INTERNA,38411502016-12-25,2016
103340,189549,3841150,inchis - achitat,2017-07-03,2017-07-03,2016-12-25,"Vatamari corporale/deces, inclusive pentru pre...",INTERNA,38411502016-12-25,2016
103343,189552,3841150,inchis - achitat,2017-07-03,2017-07-03,2016-12-25,"Vatamari corporale/deces, inclusive pentru pre...",INTERNA,38411502016-12-25,2016
103346,189555,3841150,inchis - achitat,2017-07-03,2017-07-03,2016-12-25,"Vatamari corporale/deces, inclusive pentru pre...",INTERNA,38411502016-12-25,2016
...,...,...,...,...,...,...,...,...,...,...
302384,395127,3841150,inchis - achitat,2019-03-26,2019-03-26,2016-12-25,"Vatamari corporale/deces, inclusive pentru pre...",INTERNA,38411502016-12-25,2016
302392,395136,3841150,inchis - achitat,2019-03-26,2019-03-26,2016-12-25,"Vatamari corporale/deces, inclusive pentru pre...",INTERNA,38411502016-12-25,2016
302398,395142,3841150,inchis - achitat,2019-03-26,2019-03-26,2016-12-25,"Vatamari corporale/deces, inclusive pentru pre...",INTERNA,38411502016-12-25,2016
316778,410304,3841150,inchis - achitat,2019-05-08,2019-05-08,2016-12-25,"Vatamari corporale/deces, inclusive pentru pre...",INTERNA,38411502016-12-25,2016


In [17]:
df = pd.read_feather('reserves_eq_grupat.feather')
df.head()

Unnamed: 0,dosarID,ron_eq,eur_eq
0,53151,51041.0,11349.253623
1,53156,7759.0,1722.952532
2,53160,43530.0,9584.089587
3,53171,25667.0,5730.342896
4,53177,5663.0,1267.908959


In [25]:
plati.head()

Unnamed: 0,dosarID,ron_eq,eur_eq
0,53151,51041.0,11349.253623
1,53156,7759.0,1722.952532
2,53160,43530.0,9584.089587
3,53171,25667.0,5730.342896
4,53177,5663.0,1267.908959


In [27]:
plati.ron_eq.sum()

41049847767.629555

In [28]:
rez.ron_eq.sum()

41049847767.629555

In [26]:
rez.head()

Unnamed: 0,dosarID,ron_eq,eur_eq
0,53151,51041.0,11349.253623
1,53156,7759.0,1722.952532
2,53160,43530.0,9584.089587
3,53171,25667.0,5730.342896
4,53177,5663.0,1267.908959


In [None]:
value

In [21]:
print('Number of payments:', plati.shape[0])
print('Number of reserves:', rez.shape[0])

Number of payments: 761531
Number of reserves: 761531


In [29]:
log.sample(20)

Unnamed: 0,acc_year,idEvent,idPolita,plati_ron,plati_eur,res_ron,res_eur,val_ron,val_eur,tip
492185,2020,502850,22314021,0.0,0.0,21175.0,4303.424622,21175.0,4303.424622,MAT
351006,2019,355380,16014031,8114.0,1706.41426,0.0,0.0,8114.0,1706.41426,MAT
256854,2018,275612,12851101,3300.0,707.395504,0.0,0.0,3300.0,707.395504,MAT
400803,2020,366794,16479228,6681.0,1373.476206,7635.0,1573.067715,14316.0,2946.543922,MAT
587482,2021,587482,26050707,6504.0,1320.367856,0.0,0.0,6504.0,1320.367856,MAT
455243,2020,457094,20403819,6494.0,1332.676638,8089.0,1667.494169,14583.0,3000.170807,MAT
599723,2021,599723,26745712,0.0,0.0,4708.0,955.929415,4708.0,955.929415,MAT
119571,2017,121584,6321766,0.0,0.0,0.0,0.0,0.0,0.0,MAT
617375,2021,617375,202013534,0.0,0.0,3076.0,627.044801,3076.0,627.044801,MAT
473282,2020,478603,21318095,17550.0,3551.481412,0.0,0.0,17550.0,3551.481412,MAT


In [32]:
log.idEvent.nunique()

628018

In [33]:
print(log.acc_year.min())
print(log.acc_year.max())

2015
2021


In [30]:
df3 = s3con.read('daune.feather')
df3.head()

Unnamed: 0,idDosar,idPolita,stare,dataDeschidere,dataAvizare,dataEveniment,tipEveniment,tipDauna,idEvent
0,59897,2231006,inchis - achitat,2015-01-06,2015-01-06,2015-01-05,Pagube materiale,INTERNA,22310062015-01-05
1,60564,2233977,inchis - achitat,2015-01-12,2015-01-12,2015-01-07,Pagube materiale,INTERNA,22339772015-01-07
2,60672,2231966,inchis - achitat,2015-01-13,2015-01-13,2015-01-09,Pagube materiale,INTERNA,22319662015-01-09
3,60732,2232835,inchis - achitat,2015-01-13,2015-01-13,2015-01-08,Pagube materiale,INTERNA,22328352015-01-08
4,60984,2239842,inchis - achitat,2015-01-14,2015-01-14,2015-01-09,Pagube materiale,INTERNA,22398422015-01-09


In [31]:
df3.idEvent.nunique()

638826

In [36]:
print(pd.to_datetime(df3.dataEveniment).min())
print(pd.to_datetime(df3.dataEveniment).max())

2015-01-05 00:00:00
2022-12-31 00:00:00


In [37]:
df3.dataEveniment = pd.to_datetime(df3.dataEveniment)

In [45]:
df3['acc_year'] = [df3.dataEveniment.iloc[x].year for x in range(df3.shape[0])]
a = df3[df3.acc_year < 2022].copy()
a.shape

(687941, 10)

In [47]:
a.acc_year.value_counts()

2019    144889
2020    137904
2018    118672
2021    112298
2017    102605
2016     48369
2015     23204
Name: acc_year, dtype: int64

In [46]:
a.idEvent.nunique()

638825