In [1]:
import pandas as pd
import numpy as np
from settings import Config
from mysql_db import Database
import pdcast as pdc
import s3_upload_download as s3con
import os
from datetime import datetime

# NOTEBOOK DESCRIPTION: 

Investigate the payments, the rezerves and ensure that every event has a monetary value associated with it.

NOTE: 'Value' refers to the full amount of money associated with a claim file, which is either
1. Only the payment (plati) value, when a claim file has been closed
2. Payment + reserve, when a claim file has not yet been closed.


In [2]:
# initialise the s3_connector object needed to read/write files into an S3 bucket
s3con = s3con.s3_connector()

In [3]:
# load tables
res = s3con.read('rezerve_ronEq.feather')
plati = pd.read_feather('plati_eq_wFinale.feather')
claims = s3con.read('daune.feather')

The payments (plati) are already grouped by claim file (dosarID), whilst the reserves are not yet grouped.

In [4]:
res.head()

Unnamed: 0,id,dosarID,data,suma,valuta,fx_euro,fx_usd,rezerve_ron_eq
0,236849,53151,2015-01-05,51041.0,RON,4.4973,3.7742,51041.0
1,236850,53156,2015-01-05,3309.0,RON,4.4973,3.7742,3309.0
2,350913,53156,2016-06-07,4450.0,RON,4.5078,3.9652,4450.0
3,236851,53160,2015-01-05,10369.0,RON,4.4973,3.7742,10369.0
4,361058,53160,2016-03-31,629.0,RON,4.4738,3.9349,629.0


In [5]:
res_gr = res.groupby('dosarID')['rezerve_ron_eq'].sum().to_frame()
res_gr.reset_index(inplace = True)
res_gr.head()

Unnamed: 0,dosarID,rezerve_ron_eq
0,53151,51041.0
1,53156,7759.0
2,53160,43530.0
3,53171,25667.0
4,53177,5663.0


Now, the payments and reserves columns will be merged to the Claims table.

In [6]:
plati.head()

Unnamed: 0,dosarID,ron_eq,eur_eq,plata_finala
0,53151,84604.0,19148.108732,yes
1,53156,7759.0,1721.238703,yes
2,53160,21765.0,4869.127745,yes
3,53171,25667.0,5754.416509,yes
4,53177,5663.0,1273.041987,yes


In [8]:
claims.head()

Unnamed: 0,idDosar,idPolita,stare,dataDeschidere,dataAvizare,dataEveniment,tipEveniment,tipDauna,idEvent
0,59897,2231006,inchis - achitat,2015-01-06,2015-01-06,2015-01-05,Pagube materiale,INTERNA,22310062015-01-05
1,60564,2233977,inchis - achitat,2015-01-12,2015-01-12,2015-01-07,Pagube materiale,INTERNA,22339772015-01-07
2,60672,2231966,inchis - achitat,2015-01-13,2015-01-13,2015-01-09,Pagube materiale,INTERNA,22319662015-01-09
3,60732,2232835,inchis - achitat,2015-01-13,2015-01-13,2015-01-08,Pagube materiale,INTERNA,22328352015-01-08
4,60984,2239842,inchis - achitat,2015-01-14,2015-01-14,2015-01-09,Pagube materiale,INTERNA,22398422015-01-09


In [10]:
# filter 2022 claims out and merge
claims_vals = (
    claims
    .rename(columns = {'idDosar': 'dosarID'})
    .assign(acc_year = [claims.dataEveniment.iloc[x].year for x in range(claims.shape[0])])
    .query("acc_year < 2022")
    .merge(plati[['dosarID', 'ron_eq']], how = 'left', on = 'dosarID')
    .merge(res_gr, how = 'left', on = 'dosarID')
)



In [11]:
claims_vals.head()

Unnamed: 0,dosarID,idPolita,stare,dataDeschidere,dataAvizare,dataEveniment,tipEveniment,tipDauna,idEvent,acc_year,ron_eq,rezerve_ron_eq
0,59897,2231006,inchis - achitat,2015-01-06,2015-01-06,2015-01-05,Pagube materiale,INTERNA,22310062015-01-05,2015,13918.0,4500.0
1,60564,2233977,inchis - achitat,2015-01-12,2015-01-12,2015-01-07,Pagube materiale,INTERNA,22339772015-01-07,2015,1458.0,800.0
2,60672,2231966,inchis - achitat,2015-01-13,2015-01-13,2015-01-09,Pagube materiale,INTERNA,22319662015-01-09,2015,11891.0,2500.0
3,60732,2232835,inchis - achitat,2015-01-13,2015-01-13,2015-01-08,Pagube materiale,INTERNA,22328352015-01-08,2015,650.0,650.0
4,60984,2239842,inchis - achitat,2015-01-14,2015-01-14,2015-01-09,Pagube materiale,INTERNA,22398422015-01-09,2015,6077.0,2000.0


In [12]:
claims_vals.isnull().sum()

dosarID                0
idPolita               0
stare                  0
dataDeschidere         0
dataAvizare            0
dataEveniment          0
tipEveniment           0
tipDauna               0
idEvent                0
acc_year               0
ron_eq            119352
rezerve_ron_eq        13
dtype: int64

In [14]:
claims_vals.stare.value_counts()

inchis - achitat       545116
in lucru                55794
platit partial          36452
programat la plata      29605
inchis - prescris       10026
inchis - respins         6675
inchis - fara plata      4092
suspendat                 178
returnat                    3
Name: stare, dtype: int64

There are many types of claim status here. I will reduce them to 2 - open (deschis) or closed (inchis), and delete the last 2 categories, which are not needed.

In [None]:
claims_vals = (
    claims_vals
    .replace({'inchis - achitat':'inchis', 'inchis - prescris':'inchis', 'in lucru':'deschis',
                'platit partial':'deschis', 'programat la plata':'deschis', 
                'inchis - respins':'delete', 'inchis - fara plata':'delete', 'suspendat':'delete', 'returnat':'delete'})
    .query("stare != 'delete'")
    .fillna(0)
)

claims_vals.value_counts()

In [18]:
claims_vals.stare.value_counts()

inchis     555142
deschis    121851
delete          0
Name: stare, dtype: int64

In [30]:
claims_vals.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 676993 entries, 0 to 687940
Data columns (total 12 columns):
 #   Column          Non-Null Count   Dtype   
---  ------          --------------   -----   
 0   dosarID         676993 non-null  uint32  
 1   idPolita        676993 non-null  uint32  
 2   stare           676993 non-null  category
 3   dataDeschidere  676993 non-null  category
 4   dataAvizare     676993 non-null  category
 5   dataEveniment   676993 non-null  category
 6   tipEveniment    676993 non-null  category
 7   tipDauna        676993 non-null  category
 8   idEvent         676993 non-null  object  
 9   acc_year        676993 non-null  int64   
 10  ron_eq          568571 non-null  float64 
 11  rezerve_ron_eq  676990 non-null  float64 
dtypes: category(6), float64(2), int64(1), object(1), uint32(2)
memory usage: 37.1+ MB


In [34]:
# replace nans with 0 to enable arithmetic operations
claims_vals[['ron_eq', 'rezerve_ron_eq']] = claims_vals[['ron_eq', 'rezerve_ron_eq']].fillna(0)

In [39]:
# create column with the total claim value
claims_vals['total_val'] = [claims_vals.ron_eq.iloc[x] + claims_vals.rezerve_ron_eq.iloc[x] 
                                if claims_vals.stare.iloc[x] == 'deschis' else claims_vals.ron_eq.iloc[x]
                                for x in range(claims_vals.shape[0])]

claims_vals.head()

Unnamed: 0,dosarID,idPolita,stare,dataDeschidere,dataAvizare,dataEveniment,tipEveniment,tipDauna,idEvent,acc_year,ron_eq,rezerve_ron_eq,total_val
0,59897,2231006,inchis,2015-01-06,2015-01-06,2015-01-05,Pagube materiale,INTERNA,22310062015-01-05,2015,13918.0,4500.0,13918.0
1,60564,2233977,inchis,2015-01-12,2015-01-12,2015-01-07,Pagube materiale,INTERNA,22339772015-01-07,2015,1458.0,800.0,1458.0
2,60672,2231966,inchis,2015-01-13,2015-01-13,2015-01-09,Pagube materiale,INTERNA,22319662015-01-09,2015,11891.0,2500.0,11891.0
3,60732,2232835,inchis,2015-01-13,2015-01-13,2015-01-08,Pagube materiale,INTERNA,22328352015-01-08,2015,650.0,650.0,650.0
4,60984,2239842,inchis,2015-01-14,2015-01-14,2015-01-09,Pagube materiale,INTERNA,22398422015-01-09,2015,6077.0,2000.0,6077.0


In [43]:
claims_vals.sample(5)

Unnamed: 0,dosarID,idPolita,stare,dataDeschidere,dataAvizare,dataEveniment,tipEveniment,tipDauna,idEvent,acc_year,ron_eq,rezerve_ron_eq,total_val
664297,794468,202610257,deschis,2021-08-02,2021-08-02,2021-07-19,Pagube materiale,INTERNA,2026102572021-07-19,2021,0.0,4500.0,4500.0
350601,446910,15998852,deschis,2019-08-07,2019-08-07,2019-06-09,"Vatamari corporale/deces, inclusive pentru pre...",INTERNA,159988522019-06-09,2019,0.0,38000.0,38000.0
386505,485511,17913788,inchis,2019-11-01,2019-11-01,2019-10-22,Pagube materiale,INTERNA,179137882019-10-22,2019,8377.0,8377.0,8377.0
504948,616702,17374125,inchis,2020-09-08,2020-09-08,2020-09-04,Pagube materiale,INTERNA,173741252020-09-04,2020,2894.0,2894.0,2894.0
506430,618367,20050681,inchis,2020-09-11,2020-09-11,2020-09-05,Pagube materiale,INTERNA,200506812020-09-05,2020,5436.0,5436.0,5436.0


In [44]:
claims_vals.query("total_val == 0").shape[0]

11931

There are several claim files with total values of zero. Those of them with a 'closed' status will be removed, whilst those 'open' will be imputed later with a proxy value.

In [51]:
claims_vals.drop(claims_vals.query("total_val == 0 & stare == 'inchis'").index, axis = 0, inplace = True)
claims_vals.reset_index(drop = True, inplace = True)

In [54]:
claims_vals.to_feather('claims_vals.feather')
s3con.write('claims_vals.feather')
os.remove('claims_vals.feather')