### Data manipulation example

We read in a SPLICE dataset and format it ready for inclusion in our NN model.
Where there is no transaction in a development period, that development period with have no record. Settled claims are also not included.

Steps:  

1. Read in the data
2. Create additional variables
3. Apply cutoff
4. Make sure there is one record for every development period
5. Create variables and define which ones you want to use in the model
6. Create train and test datasets

In [1]:
#pip install chainladder

import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns

import chainladder as cl

import torch
import torch.nn as nn
from torch.nn import functional as F

In [2]:
dirname="/home/nigel/git/MLR_working_party/01_data/"
filename="data_origframework_nofills_nosttl.csv"

pd.options.display.float_format = '{:,.2f}'.format

### 1. Read in the data from SPLICE  
Here we just use the paid data. Case estimate data is also available in the incurred_1.csv file but we do not use it here

In [3]:
transactions = pd.read_csv(
    f"https://raw.githubusercontent.com/agi-lab/SPLICE/main/datasets/complexity_1/payment_1.csv"
)
transactions

Unnamed: 0.1,Unnamed: 0,claim_no,pmt_no,occurrence_period,occurrence_time,claim_size,notidel,setldel,payment_time,payment_period,payment_size,payment_inflated,payment_delay
0,1,1,1,1,0.73,232310.09,0.66,23.21,5.33,6,13226.34,13226.34,3.93
1,2,1,2,1,0.73,232310.09,0.66,23.21,10.09,11,15685.86,15685.86,4.76
2,3,1,3,1,0.73,232310.09,0.66,23.21,18.02,19,14643.28,14643.28,7.93
3,4,1,4,1,0.73,232310.09,0.66,23.21,22.82,23,170041.89,170041.89,4.79
4,5,1,5,1,0.73,232310.09,0.66,23.21,24.61,25,18712.71,18712.71,1.79
...,...,...,...,...,...,...,...,...,...,...,...,...,...
19317,19318,3663,2,40,39.87,40653.72,2.29,10.67,46.40,47,2213.88,2213.88,1.84
19318,19319,3663,3,40,39.87,40653.72,2.29,10.67,48.45,49,2292.75,2292.75,2.06
19319,19320,3663,4,40,39.87,40653.72,2.29,10.67,50.24,51,2367.32,2367.32,1.78
19320,19321,3663,5,40,39.87,40653.72,2.29,10.67,51.76,52,28576.07,28576.07,1.52


### 2. Create additional variables

In [4]:
transactions["noti_period"] = np.ceil(transactions["occurrence_time"] + transactions["notidel"]).astype('int')
transactions["settle_period"] = np.ceil(transactions["occurrence_time"] + transactions["notidel"] + transactions["setldel"]).astype('int')

### 3. Apply cutoff

In [5]:
# Apply a maximum development period
maxdev=40

transactions["development_period"] = np.minimum(transactions["payment_period"] - transactions["occurrence_period"], maxdev)  

transactions

Unnamed: 0.1,Unnamed: 0,claim_no,pmt_no,occurrence_period,occurrence_time,claim_size,notidel,setldel,payment_time,payment_period,payment_size,payment_inflated,payment_delay,noti_period,settle_period,development_period
0,1,1,1,1,0.73,232310.09,0.66,23.21,5.33,6,13226.34,13226.34,3.93,2,25,5
1,2,1,2,1,0.73,232310.09,0.66,23.21,10.09,11,15685.86,15685.86,4.76,2,25,10
2,3,1,3,1,0.73,232310.09,0.66,23.21,18.02,19,14643.28,14643.28,7.93,2,25,18
3,4,1,4,1,0.73,232310.09,0.66,23.21,22.82,23,170041.89,170041.89,4.79,2,25,22
4,5,1,5,1,0.73,232310.09,0.66,23.21,24.61,25,18712.71,18712.71,1.79,2,25,24
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19317,19318,3663,2,40,39.87,40653.72,2.29,10.67,46.40,47,2213.88,2213.88,1.84,43,53,7
19318,19319,3663,3,40,39.87,40653.72,2.29,10.67,48.45,49,2292.75,2292.75,2.06,43,53,9
19319,19320,3663,4,40,39.87,40653.72,2.29,10.67,50.24,51,2367.32,2367.32,1.78,43,53,11
19320,19321,3663,5,40,39.87,40653.72,2.29,10.67,51.76,52,28576.07,28576.07,1.52,43,53,12


Only include records for a claim that is after the notification period

In [14]:
#dat = dat.loc[dat.payment_period >= dat.noti_period].copy()
#dat

### 5. Create variables and define which ones you want to use in the model

In [6]:
dat=transactions

# Clean close to zero values
dat["payment_size"] = np.where(abs(dat.payment_size) < 1e-2, 0.0, dat.payment_size)

#    payment_period=lambda df: (df.occurrence_period + df.development_period),
dat["is_settled"]=((dat.occurrence_period + dat.development_period) >= dat.settle_period)

# Cumulative payments
dat["payment_size_cumulative"] = dat[["claim_no", "payment_size"]].groupby('claim_no').cumsum()
dat["log1_paid_cumulative"] = np.log1p(dat.payment_size_cumulative)

dat["pmt_no"] = dat.groupby("claim_no")["pmt_no"].cummax()

dat

Unnamed: 0.1,Unnamed: 0,claim_no,pmt_no,occurrence_period,occurrence_time,claim_size,notidel,setldel,payment_time,payment_period,payment_size,payment_inflated,payment_delay,noti_period,settle_period,development_period,is_settled,payment_size_cumulative,log1_paid_cumulative
0,1,1,1,1,0.73,232310.09,0.66,23.21,5.33,6,13226.34,13226.34,3.93,2,25,5,False,13226.34,9.49
1,2,1,2,1,0.73,232310.09,0.66,23.21,10.09,11,15685.86,15685.86,4.76,2,25,10,False,28912.20,10.27
2,3,1,3,1,0.73,232310.09,0.66,23.21,18.02,19,14643.28,14643.28,7.93,2,25,18,False,43555.48,10.68
3,4,1,4,1,0.73,232310.09,0.66,23.21,22.82,23,170041.89,170041.89,4.79,2,25,22,False,213597.38,12.27
4,5,1,5,1,0.73,232310.09,0.66,23.21,24.61,25,18712.71,18712.71,1.79,2,25,24,True,232310.09,12.36
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19317,19318,3663,2,40,39.87,40653.72,2.29,10.67,46.40,47,2213.88,2213.88,1.84,43,53,7,False,4832.11,8.48
19318,19319,3663,3,40,39.87,40653.72,2.29,10.67,48.45,49,2292.75,2292.75,2.06,43,53,9,False,7124.86,8.87
19319,19320,3663,4,40,39.87,40653.72,2.29,10.67,50.24,51,2367.32,2367.32,1.78,43,53,11,False,9492.18,9.16
19320,19321,3663,5,40,39.87,40653.72,2.29,10.67,51.76,52,28576.07,28576.07,1.52,43,53,12,False,38068.25,10.55


Show records just for an individual claim - here claim_no 1

In [7]:
dat[dat['claim_no'] == 1]

Unnamed: 0.1,Unnamed: 0,claim_no,pmt_no,occurrence_period,occurrence_time,claim_size,notidel,setldel,payment_time,payment_period,payment_size,payment_inflated,payment_delay,noti_period,settle_period,development_period,is_settled,payment_size_cumulative,log1_paid_cumulative
0,1,1,1,1,0.73,232310.09,0.66,23.21,5.33,6,13226.34,13226.34,3.93,2,25,5,False,13226.34,9.49
1,2,1,2,1,0.73,232310.09,0.66,23.21,10.09,11,15685.86,15685.86,4.76,2,25,10,False,28912.2,10.27
2,3,1,3,1,0.73,232310.09,0.66,23.21,18.02,19,14643.28,14643.28,7.93,2,25,18,False,43555.48,10.68
3,4,1,4,1,0.73,232310.09,0.66,23.21,22.82,23,170041.89,170041.89,4.79,2,25,22,False,213597.38,12.27
4,5,1,5,1,0.73,232310.09,0.66,23.21,24.61,25,18712.71,18712.71,1.79,2,25,24,True,232310.09,12.36


Define which variables you want to use in the model

output_field and youtput are also created to define the y variate for use in the model

### Only include records for where the claim is not settled

In [8]:
dat_set = dat.loc[dat.is_settled == True]
#dat[dat['claim_no'] == 1]
dat_set

Unnamed: 0.1,Unnamed: 0,claim_no,pmt_no,occurrence_period,occurrence_time,claim_size,notidel,setldel,payment_time,payment_period,payment_size,payment_inflated,payment_delay,noti_period,settle_period,development_period,is_settled,payment_size_cumulative,log1_paid_cumulative
4,5,1,5,1,0.73,232310.09,0.66,23.21,24.61,25,18712.71,18712.71,1.79,2,25,24,True,232310.09,12.36
9,10,2,5,1,0.33,237675.12,0.74,22.15,23.22,24,14508.98,14508.98,3.46,2,24,23,True,237675.12,12.38
15,16,3,6,1,0.52,83559.57,0.90,9.83,11.25,12,7135.71,7135.71,0.62,2,12,11,True,83559.57,11.33
20,21,4,5,1,0.74,227599.31,0.42,16.91,18.08,19,14522.22,14522.22,0.92,2,19,18,True,227599.31,12.34
24,25,5,4,1,0.62,119227.15,1.64,7.72,9.31,10,94467.44,94467.44,1.80,3,10,9,True,112184.28,11.63
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19302,19303,3659,4,40,39.54,19709.37,0.06,6.67,46.28,47,1526.06,1526.06,1.04,40,47,7,True,19709.37,9.89
19306,19307,3660,4,40,39.43,37299.39,0.22,8.87,48.52,49,2974.15,2974.15,1.45,40,49,9,True,37299.39,10.53
19310,19311,3661,4,40,39.36,1104261.06,0.38,26.52,66.27,67,88058.25,88058.25,2.43,40,67,27,True,1104261.06,13.91
19315,19316,3662,5,40,39.79,38973.68,2.12,3.12,45.02,46,3436.12,3436.12,0.48,42,46,6,True,38973.68,10.57


In [9]:
dat = dat.loc[dat.is_settled == False]
#dat[dat['claim_no'] == 1]
dat

Unnamed: 0.1,Unnamed: 0,claim_no,pmt_no,occurrence_period,occurrence_time,claim_size,notidel,setldel,payment_time,payment_period,payment_size,payment_inflated,payment_delay,noti_period,settle_period,development_period,is_settled,payment_size_cumulative,log1_paid_cumulative
0,1,1,1,1,0.73,232310.09,0.66,23.21,5.33,6,13226.34,13226.34,3.93,2,25,5,False,13226.34,9.49
1,2,1,2,1,0.73,232310.09,0.66,23.21,10.09,11,15685.86,15685.86,4.76,2,25,10,False,28912.20,10.27
2,3,1,3,1,0.73,232310.09,0.66,23.21,18.02,19,14643.28,14643.28,7.93,2,25,18,False,43555.48,10.68
3,4,1,4,1,0.73,232310.09,0.66,23.21,22.82,23,170041.89,170041.89,4.79,2,25,22,False,213597.38,12.27
5,6,2,1,1,0.33,237675.12,0.74,22.15,3.46,4,14014.05,14014.05,2.38,2,24,3,False,14014.05,9.55
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19316,19317,3663,1,40,39.87,40653.72,2.29,10.67,44.56,45,2618.23,2618.23,2.40,43,53,5,False,2618.23,7.87
19317,19318,3663,2,40,39.87,40653.72,2.29,10.67,46.40,47,2213.88,2213.88,1.84,43,53,7,False,4832.11,8.48
19318,19319,3663,3,40,39.87,40653.72,2.29,10.67,48.45,49,2292.75,2292.75,2.06,43,53,9,False,7124.86,8.87
19319,19320,3663,4,40,39.87,40653.72,2.29,10.67,50.24,51,2367.32,2367.32,1.78,43,53,11,False,9492.18,9.16


In [10]:
# Potential features for model later:
data_cols = [
    "claim_no",
    "occurrence_time", 
    "notidel", 
    "development_period", 
    "pmt_no",
    "log1_paid_cumulative",
]

list_of_features = data_cols
output_field = ["claim_size"]
youtput="claim_size"

dat.loc[:, data_cols + [youtput]]
#dat.loc[:, list_of_features + output_field]

Unnamed: 0,claim_no,occurrence_time,notidel,development_period,pmt_no,log1_paid_cumulative,claim_size
0,1,0.73,0.66,5,1,9.49,232310.09
1,1,0.73,0.66,10,2,10.27,232310.09
2,1,0.73,0.66,18,3,10.68,232310.09
3,1,0.73,0.66,22,4,12.27,232310.09
5,2,0.33,0.74,3,1,9.55,237675.12
...,...,...,...,...,...,...,...
19316,3663,39.87,2.29,5,1,7.87,40653.72
19317,3663,39.87,2.29,7,2,8.48,40653.72
19318,3663,39.87,2.29,9,3,8.87,40653.72
19319,3663,39.87,2.29,11,4,9.16,40653.72


## 6. Create train and test datasets.  
We provide three types of indicators here for reference, but in our NN example we only use train_ind  

* train_ind creates a split by claim - 'rectangular data'
* train_ind_time is what you would use if you wanted to split the datasets by calendar period
* cv_ind is what you might use if you were doing cross validation

In [11]:
cutoff=maxdev

dat["train_ind"] = (dat.claim_no % 10 >= 4)
dat["train_ind_time"] = (dat.payment_period <= cutoff)
dat["cv_ind"] = dat.payment_period % 5
dat

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0.1,Unnamed: 0,claim_no,pmt_no,occurrence_period,occurrence_time,claim_size,notidel,setldel,payment_time,payment_period,...,payment_delay,noti_period,settle_period,development_period,is_settled,payment_size_cumulative,log1_paid_cumulative,train_ind,train_ind_time,cv_ind
0,1,1,1,1,0.73,232310.09,0.66,23.21,5.33,6,...,3.93,2,25,5,False,13226.34,9.49,False,True,1
1,2,1,2,1,0.73,232310.09,0.66,23.21,10.09,11,...,4.76,2,25,10,False,28912.20,10.27,False,True,1
2,3,1,3,1,0.73,232310.09,0.66,23.21,18.02,19,...,7.93,2,25,18,False,43555.48,10.68,False,True,4
3,4,1,4,1,0.73,232310.09,0.66,23.21,22.82,23,...,4.79,2,25,22,False,213597.38,12.27,False,True,3
5,6,2,1,1,0.33,237675.12,0.74,22.15,3.46,4,...,2.38,2,24,3,False,14014.05,9.55,False,True,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19316,19317,3663,1,40,39.87,40653.72,2.29,10.67,44.56,45,...,2.40,43,53,5,False,2618.23,7.87,False,False,0
19317,19318,3663,2,40,39.87,40653.72,2.29,10.67,46.40,47,...,1.84,43,53,7,False,4832.11,8.48,False,False,2
19318,19319,3663,3,40,39.87,40653.72,2.29,10.67,48.45,49,...,2.06,43,53,9,False,7124.86,8.87,False,False,4
19319,19320,3663,4,40,39.87,40653.72,2.29,10.67,50.24,51,...,1.78,43,53,11,False,9492.18,9.16,False,False,1


In [12]:
X_train = (dat.loc[(dat.train_ind == 1), list_of_features])
y_train = (dat.loc[(dat.train_ind == 1), youtput])

X_test = (dat.loc[(dat.train_ind == 0), list_of_features])
y_test = (dat.loc[(dat.train_ind == 0), youtput])

X = (dat.loc[:, list_of_features])
y = (dat.loc[:, youtput])

In [14]:
dat.loc[(dat.train_ind == 1)]

Unnamed: 0.1,Unnamed: 0,claim_no,pmt_no,occurrence_period,occurrence_time,claim_size,notidel,setldel,payment_time,payment_period,...,payment_delay,noti_period,settle_period,development_period,is_settled,payment_size_cumulative,log1_paid_cumulative,train_ind,train_ind_time,cv_ind
16,17,4,1,1,0.74,227599.31,0.42,16.91,2.69,3,...,1.53,2,19,2,False,9886.19,9.20,True,True,3
17,18,4,2,1,0.74,227599.31,0.42,16.91,7.48,8,...,4.79,2,19,7,False,22944.02,10.04,True,True,3
18,19,4,3,1,0.74,227599.31,0.42,16.91,12.25,13,...,4.76,2,19,12,False,34648.16,10.45,True,True,3
19,20,4,4,1,0.74,227599.31,0.42,16.91,17.16,18,...,4.91,2,19,17,False,213077.09,12.27,True,True,3
21,22,5,1,1,0.62,119227.15,1.64,7.72,4.30,5,...,2.04,3,10,4,False,5870.76,8.68,True,True,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19295,19296,3658,2,40,39.32,263001.49,1.51,10.10,46.37,47,...,3.41,41,51,7,False,21720.12,9.99,True,False,2
19296,19297,3658,3,40,39.32,263001.49,1.51,10.10,48.55,49,...,2.18,41,51,9,False,33159.09,10.41,True,False,4
19299,19300,3659,1,40,39.54,19709.37,0.06,6.67,42.20,43,...,2.59,40,47,3,False,2615.21,7.87,True,False,3
19300,19301,3659,2,40,39.54,19709.37,0.06,6.67,44.08,45,...,1.88,40,47,5,False,4888.79,8.49,True,False,0


In [15]:
dat.loc[(dat.train_ind == 0)]

Unnamed: 0.1,Unnamed: 0,claim_no,pmt_no,occurrence_period,occurrence_time,claim_size,notidel,setldel,payment_time,payment_period,...,payment_delay,noti_period,settle_period,development_period,is_settled,payment_size_cumulative,log1_paid_cumulative,train_ind,train_ind_time,cv_ind
0,1,1,1,1,0.73,232310.09,0.66,23.21,5.33,6,...,3.93,2,25,5,False,13226.34,9.49,False,True,1
1,2,1,2,1,0.73,232310.09,0.66,23.21,10.09,11,...,4.76,2,25,10,False,28912.20,10.27,False,True,1
2,3,1,3,1,0.73,232310.09,0.66,23.21,18.02,19,...,7.93,2,25,18,False,43555.48,10.68,False,True,4
3,4,1,4,1,0.73,232310.09,0.66,23.21,22.82,23,...,4.79,2,25,22,False,213597.38,12.27,False,True,3
5,6,2,1,1,0.33,237675.12,0.74,22.15,3.46,4,...,2.38,2,24,3,False,14014.05,9.55,False,True,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19316,19317,3663,1,40,39.87,40653.72,2.29,10.67,44.56,45,...,2.40,43,53,5,False,2618.23,7.87,False,False,0
19317,19318,3663,2,40,39.87,40653.72,2.29,10.67,46.40,47,...,1.84,43,53,7,False,4832.11,8.48,False,False,2
19318,19319,3663,3,40,39.87,40653.72,2.29,10.67,48.45,49,...,2.06,43,53,9,False,7124.86,8.87,False,False,4
19319,19320,3663,4,40,39.87,40653.72,2.29,10.67,50.24,51,...,1.78,43,53,11,False,9492.18,9.16,False,False,1


In [37]:
import datetime

def map_value_to_date(value: float) -> datetime.date:
    """
    Maps a value in the range [0, 40] to a date between 01/01/2000 and 31/12/2010.

    Args:
        value: The input value (float) to map, expected in the range [0, 40].

    Returns:
        A datetime.date object (only the date, no time).
    """

    # 1. Define the input range (Source)
    X_MIN = 0.0
    X_MAX = 40.0

    # 2. Define the output date range (Target)
    # Note: We still define the time bounds (00:00:00 to 23:59:59) for accuracy
    START_DATE = datetime.datetime(2000, 1, 1, 0, 0, 0)
    END_DATE = datetime.datetime(2010, 12, 31, 23, 59, 59)

    # 3. Convert target dates to Unix epoch times
    T_START = START_DATE.timestamp()
    T_END = END_DATE.timestamp()
    T_RANGE = T_END - T_START

    # 5. Linear Interpolation
    normalized_ratio = (value - X_MIN) / (X_MAX - X_MIN)
    mapped_epoch_time = T_START + (normalized_ratio * T_RANGE)

    # 6. Convert the resulting epoch time back to a datetime object
    mapped_datetime = datetime.datetime.fromtimestamp(mapped_epoch_time)
    
    # 7. 💥 AMENDMENT: Return only the date part 💥
    return mapped_datetime.date()



In [38]:
dat['occurrence_date'] = dat['occurrence_time'].apply(map_value_to_date)
dat['notification_date'] = (dat['occurrence_time']+ dat['notidel']).apply(map_value_to_date)
dat['payment_date'] = dat['payment_time'].apply(map_value_to_date)
dat['settlement_date'] = (dat['occurrence_time']+ dat['notidel']+ dat['setldel']).apply(map_value_to_date)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user

Save file to disk

In [41]:
cl.Triangle(dat.loc[(dat.train_ind == 1)], origin='occurrence_date', development='payment_date', columns=['payment_size'], cumulative = False).grain('OYDY')

#cl.load_sample('quarterly').grain('OYDY')

  pd.Series(range(len(self.origin)), index=self.origin)


Unnamed: 0,1,13,25,37,49,61,73,85,97,109,121,133,145,157,169,181,193,205,217,229,241
2000,,258535.0,1679966.0,3379762.0,3371165.0,4039610.0,3762410.0,3397567.0,1541008.0,520104.0,,,,,,,,,,,
2001,,231545.0,1500161.0,2766985.0,5267991.0,3968971.0,4361166.0,5559051.0,5155673.0,2154953.0,1827002.0,,,,,,,,,,
2002,,199973.0,1783055.0,2805680.0,4605242.0,8342743.0,4123801.0,4710990.0,3219327.0,722157.0,2349908.0,,,,,,,,,,
2003,,255178.0,1583159.0,3001309.0,3408810.0,6209381.0,3438935.0,4133384.0,2009005.0,233617.0,542878.0,,,,,,,,,,
2004,,321554.0,1811090.0,4036237.0,3914044.0,5631342.0,4311520.0,1142252.0,1237667.0,2142271.0,,,,,,,,,,,
2005,,340416.0,1934799.0,3409590.0,5345013.0,4028026.0,5184898.0,4111696.0,4376084.0,1373824.0,,,,,,,,,,,
2006,,362459.0,1272543.0,2589444.0,3159741.0,4864188.0,4545184.0,3090956.0,849786.0,,,,,,,,,,,,
2007,,331939.0,1668113.0,3291943.0,5045788.0,8428769.0,4405005.0,4416596.0,2285446.0,3131650.0,,,,,,,,,,,
2008,,260093.0,1486760.0,2475399.0,3913153.0,6590303.0,3666226.0,2953399.0,1757921.0,792030.0,868065.0,,,,,,,,,,
2009,,281146.0,1547891.0,2358648.0,4536720.0,6912204.0,4437029.0,3949124.0,4122086.0,1643708.0,1367191.0,,,,,,,,,,


In [20]:
dat.to_csv(
    dirname + filename
)

In [18]:
filename

'data_origframework_nofills_nosttl.csv'