# Data Summarization

## Import Dependenices and Custom Modules

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from scripts.data_loader import DataLoader
from scripts.data_analysis import DataAnalysis
from scripts.data_visualize import DataVisualize

In [2]:
# Loads a dataset from a text file and saves it as a CSV file.
# dataLoader = DataLoader()
# df = dataLoader.load_and_save_as_csv('../data/raw/MachineLearningRating_v3.txt')

df = pd.read_csv('../data/raw/MachineLearningRating_v3.csv')

dataAnalysis = DataAnalysis(df)
dataVisualize = DataVisualize(df)

  df = pd.read_csv('../data/raw/MachineLearningRating_v3.csv')


In [3]:
# List the columns of the DataFrame
print("Columns in the DataFrame:")
print(df.columns)

Columns in the DataFrame:
Index(['UnderwrittenCoverID', 'PolicyID', 'TransactionMonth',
       'IsVATRegistered', 'Citizenship', 'LegalType', 'Title', 'Language',
       'Bank', 'AccountType', 'MaritalStatus', 'Gender', 'Country', 'Province',
       'PostalCode', 'MainCrestaZone', 'SubCrestaZone', 'ItemType', 'mmcode',
       'VehicleType', 'RegistrationYear', 'make', 'Model', 'Cylinders',
       'cubiccapacity', 'kilowatts', 'bodytype', 'NumberOfDoors',
       'VehicleIntroDate', 'CustomValueEstimate', 'AlarmImmobiliser',
       'TrackingDevice', 'CapitalOutstanding', 'NewVehicle', 'WrittenOff',
       'Rebuilt', 'Converted', 'CrossBorder', 'NumberOfVehiclesInFleet',
       'SumInsured', 'TermFrequency', 'CalculatedPremiumPerTerm',
       'ExcessSelected', 'CoverCategory', 'CoverType', 'CoverGroup', 'Section',
       'Product', 'StatutoryClass', 'StatutoryRiskType', 'TotalPremium',
       'TotalClaims'],
      dtype='object')


In [4]:
# List the shape of the DataFrame
print("\nShape of the DataFrame:")
print(df.shape)


Shape of the DataFrame:
(1000098, 52)


In [5]:
# Check duplicate values
print("Check for duplicate values:")
print(df.duplicated().sum())

Check for duplicate values:
0


- The number of rows is 10000098 and the number of columns is 52 with no duplicate value

## Data Understanding

### Insurance Policy

In [6]:
# List the columns overview for insurance policies
insurance_policy_columns = [ 'UnderwrittenCoverID', 'PolicyID']

dataAnalysis.columns_overview(insurance_policy_columns)

The first 5 rows
   UnderwrittenCoverID  PolicyID
0               145249     12827
1               145249     12827
2               145249     12827
3               145255     12827
4               145255     12827
----------------------------------
The last 5 rows
         UnderwrittenCoverID  PolicyID
1000093                31520       389
1000094                31520       389
1000095                31520       389
1000096                31519       389
1000097                31519       389
----------------------------------
The date type of the columns:
UnderwrittenCoverID    int64
PolicyID               int64
dtype: object
----------------------------------
The count of unique values in the columns:
UnderwrittenCoverID    116532
PolicyID                 7000
dtype: int64
----------------------------------
Check for missing values:
UnderwrittenCoverID    0
PolicyID               0
dtype: int64


The **Insurance Policy** columns are UnderwrittenCoverID and PolicyID.
- **UnderwrittenCoverID**: Unique identifier for a specific insurance coverage with **116532** unique values.
- **PolicyID**: Identifier for an entire insurance policy with **7000** unique values.
- The columns are **integer**.
- **No missing values** in both columns.
- PolicyID is many-to-one: each policy covers multiple UnderwrittenCoverIDs.

### Transaction Date

In [7]:
# List the column overview for transaction date
dataAnalysis.columns_overview('TransactionMonth')

The first 5 rows
0    2015-03-01 00:00:00
1    2015-05-01 00:00:00
2    2015-07-01 00:00:00
3    2015-05-01 00:00:00
4    2015-07-01 00:00:00
Name: TransactionMonth, dtype: object
----------------------------------
The last 5 rows
1000093    2015-04-01 00:00:00
1000094    2015-06-01 00:00:00
1000095    2015-08-01 00:00:00
1000096    2014-07-01 00:00:00
1000097    2015-02-01 00:00:00
Name: TransactionMonth, dtype: object
----------------------------------
The date type of the columns:
object
----------------------------------
The count of unique values in the columns:
23
----------------------------------
Check for missing values:
0


In [8]:
# Convert the 'TransactionMonth' column to datetime format
dataAnalysis.convert_to_datetime_format('TransactionMonth')

In [9]:
# List of unique Months in the dataset
print("\n23 Months in the dataset:")
dataAnalysis.show_unique_values('TransactionMonth')


23 Months in the dataset:


[Timestamp('2013-10-01 00:00:00'),
 Timestamp('2013-11-01 00:00:00'),
 Timestamp('2013-12-01 00:00:00'),
 Timestamp('2014-01-01 00:00:00'),
 Timestamp('2014-02-01 00:00:00'),
 Timestamp('2014-03-01 00:00:00'),
 Timestamp('2014-04-01 00:00:00'),
 Timestamp('2014-05-01 00:00:00'),
 Timestamp('2014-06-01 00:00:00'),
 Timestamp('2014-07-01 00:00:00'),
 Timestamp('2014-08-01 00:00:00'),
 Timestamp('2014-09-01 00:00:00'),
 Timestamp('2014-10-01 00:00:00'),
 Timestamp('2014-11-01 00:00:00'),
 Timestamp('2014-12-01 00:00:00'),
 Timestamp('2015-01-01 00:00:00'),
 Timestamp('2015-02-01 00:00:00'),
 Timestamp('2015-03-01 00:00:00'),
 Timestamp('2015-04-01 00:00:00'),
 Timestamp('2015-05-01 00:00:00'),
 Timestamp('2015-06-01 00:00:00'),
 Timestamp('2015-07-01 00:00:00'),
 Timestamp('2015-08-01 00:00:00')]

The **Transaction Date** column is TransactionMonth.
- **TransactionMonth**: Month the transaction occurred with **23** unique values.
- Intialy the column was **object**, now changed to **datetime** format.
- **No missing values** in the column.
- The date is from October 2013 to August 2015.

### Columns about the client

In [10]:
# List the columns overview for client information
client_columns = [ 'IsVATRegistered', 'Citizenship', 'LegalType', 'Title', 'Language',
       'Bank', 'AccountType', 'MaritalStatus', 'Gender']

dataAnalysis.columns_overview(client_columns)

The first 5 rows
   IsVATRegistered Citizenship          LegalType Title Language  \
0             True              Close Corporation    Mr  English   
1             True              Close Corporation    Mr  English   
2             True              Close Corporation    Mr  English   
3             True              Close Corporation    Mr  English   
4             True              Close Corporation    Mr  English   

                  Bank      AccountType  MaritalStatus         Gender  
0  First National Bank  Current account  Not specified  Not specified  
1  First National Bank  Current account  Not specified  Not specified  
2  First National Bank  Current account  Not specified  Not specified  
3  First National Bank  Current account  Not specified  Not specified  
4  First National Bank  Current account  Not specified  Not specified  
----------------------------------
The last 5 rows
         IsVATRegistered Citizenship   LegalType Title Language       Bank  \
1000093      

In [11]:
# List the unique values in the client information columns
print("\nUnique values in client information columns:")
dataAnalysis.show_unique_values(client_columns)


Unique values in client information columns:


{'IsVATRegistered': array([ True, False]),
 'Citizenship': array(['  ', 'AF', 'ZA', 'ZW'], dtype=object),
 'LegalType': array(['Close Corporation', 'Individual', 'Partnership',
        'Private company', 'Public company', 'Sole proprieter'],
       dtype=object),
 'Title': array(['Mr', 'Dr', 'Miss', 'Mrs', 'Ms'], dtype=object),
 'Language': array(['English'], dtype=object),
 'Bank': array(['First National Bank', 'Standard Bank', nan, 'ABSA Bank',
        'Capitec Bank', 'Nedbank', 'FirstRand Bank', 'Investec Bank',
        'Ithala Bank', 'Old Mutual', 'Mercantile Lisbon Bank',
        'RMB Private Bank'], dtype=object),
 'AccountType': array(['Current account', 'Savings account', nan, 'Transmission account'],
       dtype=object),
 'MaritalStatus': array(['Not specified', 'Married', 'Single', nan], dtype=object),
 'Gender': array(['Not specified', 'Male', 'Female', nan], dtype=object)}

The **Client Information** columns are IsVATRegistered, Citizenship, LegalType, Title, Language,
       Bank, AccountType, MaritalStatus and Gender.
- **IsVATRegistered**: indicates business registration for tax occurred with **2** unique values.
    - **Values:** True, False
- **Citizenship**: **4** unique values.
    - **Values:** '  ', 'AF', 'ZA', 'ZW'
- **LegalType**: **6** unique values.
    - **Values:** 'Close Corporation', 'Individual', 'Partnership',
        'Private company', 'Public company', 'Sole proprieter'
- **Title**: **5** unique values.
    - **Values:** 'Mr', 'Dr', 'Miss', 'Mrs', 'Ms'
- **Language**: **1** unique values.
    - All of the **Value** of the column is English.
- **Bank**: **11** unique values.
- **AccountType**: **3** unique values.
    - **Values:** 'Current account', 'Savings account', 'Transmission account'
- **MaritalStatus**: **3** unique values.
    - **Values:** 'Not specified', 'Married', 'Single'
- **Gender**: **3** unique values.
    - **Values:** 'Not specified', 'Male', 'Female'
- IsVATRegistered is **boolean** data type but the remaining columns are **object** datatype
- There were **missing value** in columns: Bank, AccountType, MaritalStatus and Gender.

### Client Location

In [12]:
# List the columns overview for client location information
client_location_columns = [ 'Country', 'Province', 'PostalCode', 'MainCrestaZone', 'SubCrestaZone']

dataAnalysis.columns_overview(client_location_columns)

The first 5 rows
        Country Province  PostalCode MainCrestaZone SubCrestaZone
0  South Africa  Gauteng        1459      Rand East     Rand East
1  South Africa  Gauteng        1459      Rand East     Rand East
2  South Africa  Gauteng        1459      Rand East     Rand East
3  South Africa  Gauteng        1459      Rand East     Rand East
4  South Africa  Gauteng        1459      Rand East     Rand East
----------------------------------
The last 5 rows
              Country      Province  PostalCode  \
1000093  South Africa  Western Cape        7493   
1000094  South Africa  Western Cape        7493   
1000095  South Africa  Western Cape        7493   
1000096  South Africa  Western Cape        7493   
1000097  South Africa  Western Cape        7493   

                           MainCrestaZone    SubCrestaZone  
1000093  Karoo 1 (Northeast of Cape Town)  Northeast of CT  
1000094  Karoo 1 (Northeast of Cape Town)  Northeast of CT  
1000095  Karoo 1 (Northeast of Cape Town)  Nor

In [13]:
# List the unique values in the client information columns
client_location_columns_without_PostalCode = [col for col in client_location_columns if col != 'PostalCode']
print("\nUnique values in client location information columns:")
dataAnalysis.show_unique_values(client_location_columns_without_PostalCode)


Unique values in client location information columns:


{'Country': array(['South Africa'], dtype=object),
 'Province': array(['Gauteng', 'KwaZulu-Natal', 'Mpumalanga', 'Eastern Cape',
        'Western Cape', 'Limpopo', 'North West', 'Free State',
        'Northern Cape'], dtype=object),
 'MainCrestaZone': array(['Rand East', 'Transvaal (all except Pretoria)', 'Johannesburg',
        'Natal (Durban)', 'Tembu 2, Cape Mid 2, Cape Mid West, Tembu 1',
        'Transvaal (Pretoria)', 'Natal', 'Cape Province (Cape Town)',
        'Langkloof, Coast 2, Coast 1',
        'Cape Province (East and North of Cape Town)', 'Oranje Free State',
        'East London', 'Port Elizabeth', 'Ciskei, Cape Mid 1',
        'Cape Province', 'Karoo 1 (Northeast of Cape Town)'], dtype=object),
 'SubCrestaZone': array(['Rand East', 'Transvaal South', 'Johannesburg', 'Durban',
        'Transvaal South East', 'Cape Mid West', 'Pretoria', 'Rand West',
        'North Coast', 'Cape Town', 'Transvaal North', 'Langkloof',
        'Transvaal North West', 'Transvaal East', 'Eas

The **Client Location** columns are 'Country', 'Province', 'PostalCode', 'MainCrestaZone' and 'SubCrestaZone'.
- **Country**: South Africa with **1** unique value.
- **Provice**: **9** unique values.
- The columns are **object** except **PostalCode** which is integer.
- **No missing values** in all columns.

### Car Insured

In [14]:
# List the columns overview for  car insured information
car_insured_columns1 = [ 'ItemType', 'mmcode', 'VehicleType', 'RegistrationYear', 'make', 'Model', 'Cylinders',
       'cubiccapacity', 'kilowatts', 'bodytype', 'NumberOfDoors', 'VehicleIntroDate']

dataAnalysis.columns_overview(car_insured_columns1)

The first 5 rows
           ItemType      mmcode        VehicleType  RegistrationYear  \
0  Mobility - Motor  44069150.0  Passenger Vehicle              2004   
1  Mobility - Motor  44069150.0  Passenger Vehicle              2004   
2  Mobility - Motor  44069150.0  Passenger Vehicle              2004   
3  Mobility - Motor  44069150.0  Passenger Vehicle              2004   
4  Mobility - Motor  44069150.0  Passenger Vehicle              2004   

            make  Model  Cylinders  cubiccapacity  kilowatts bodytype  \
0  MERCEDES-BENZ  E 240        6.0         2597.0      130.0      S/D   
1  MERCEDES-BENZ  E 240        6.0         2597.0      130.0      S/D   
2  MERCEDES-BENZ  E 240        6.0         2597.0      130.0      S/D   
3  MERCEDES-BENZ  E 240        6.0         2597.0      130.0      S/D   
4  MERCEDES-BENZ  E 240        6.0         2597.0      130.0      S/D   

   NumberOfDoors VehicleIntroDate  
0            4.0           6/2002  
1            4.0           6/2002  
2  

In [15]:
# List the unique values in the car insured information columns
some_car_insured_columns1 = [ 'ItemType', 'VehicleType', 'RegistrationYear', 
        'make', 'Cylinders', 'bodytype', 'NumberOfDoors']
print("\nUnique values in car insured information columns:")
dataAnalysis.show_unique_values(some_car_insured_columns1)


Unique values in car insured information columns:


{'ItemType': array(['Mobility - Motor'], dtype=object),
 'VehicleType': array(['Passenger Vehicle', 'Medium Commercial', 'Heavy Commercial',
        'Light Commercial', 'Bus', nan], dtype=object),
 'RegistrationYear': array([2004, 2006, 2009, 2011, 2007, 2014, 2010, 2013, 2008, 2015, 2012,
        2005, 1998, 1995, 2000, 2003, 1999, 2001, 1997, 1994, 2002, 1996,
        1992, 1987, 1988]),
 'make': array(['MERCEDES-BENZ', 'VOLKSWAGEN', 'RENAULT', 'FORD', 'BMW', 'AUDI',
        'VOLVO', 'PROTON', 'TOYOTA', 'NISSAN/DATSUN                      ',
        'CMC', 'MERCEDES-BENZ                      ', 'C.A.M', 'POLARSUN',
        'NISSAN', 'IVECO', 'FIAT', 'TOYOTA                             ',
        'JINBEI', 'HYUNDAI', 'GOLDEN JOURNEY', 'KIA', 'MITSUBISHI', 'TATA',
        'MAZDA', 'CITROEN                            ', 'FOTON', 'B.A.W',
        'PEUGEOT', 'JINBEI                             ',
        'SUZUKI                             ',
        'CHERY                              ',

In [16]:
# Convert the 'mmcode' column to int format
dataAnalysis.convert_to_int('mmcode')

In [17]:
# Convert the 'RegistrationYear' column to datetime format
dataAnalysis.convert_year_to_datetime_format('RegistrationYear')

In [18]:
# Convert the 'Cylinders' column to int format
dataAnalysis.convert_to_int('Cylinders')

# Convert the 'NumberOfDoors' column to int format
dataAnalysis.convert_to_int('NumberOfDoors')

In [19]:
# Convert the 'VehicleIntroDate' column to datetime format
dataAnalysis.convert_to_datetime_format('VehicleIntroDate')

Half **Car Issued** columns are ''ItemType', 'mmcode',
       'VehicleType', 'RegistrationYear', 'make', 'Model', 'Cylinders',
       'cubiccapacity', 'kilowatts', 'bodytype', 'NumberOfDoors', and 'VehicleIntroDate'.
- **ItemType**: Mobility - Motor with **1** unique value.
- **mmcode**: convert to int data type.
- **VehicleType**: **5** unique values.
- **RegistrationYear**: **25** unique values convert to datetime data type.
- **Cylinders**: **7** unique values convert to int data type.
- **NumberOfDoors**: **6** unique values convert to int data type.
- **VehicleIntroDate**: convert to datetime data type.
- The columns were **object**, **int** and **float** where some columns are changed.
- 10 columns have **552 missing values**.

In [20]:
# List the columns overview for  car insured information
car_insured_columns2 = [ 'CustomValueEstimate', 'AlarmImmobiliser',
       'TrackingDevice', 'CapitalOutstanding', 'NewVehicle', 'WrittenOff',
       'Rebuilt', 'Converted', 'CrossBorder', 'NumberOfVehiclesInFleet']

dataAnalysis.columns_overview(car_insured_columns2)

The first 5 rows
   CustomValueEstimate AlarmImmobiliser TrackingDevice CapitalOutstanding  \
0             119300.0              Yes             No             119300   
1             119300.0              Yes             No             119300   
2             119300.0              Yes             No             119300   
3             119300.0              Yes             No             119300   
4             119300.0              Yes             No             119300   

           NewVehicle WrittenOff Rebuilt Converted CrossBorder  \
0  More than 6 months        NaN     NaN       NaN         NaN   
1  More than 6 months        NaN     NaN       NaN         NaN   
2  More than 6 months        NaN     NaN       NaN         NaN   
3  More than 6 months        NaN     NaN       NaN         NaN   
4  More than 6 months        NaN     NaN       NaN         NaN   

   NumberOfVehiclesInFleet  
0                      NaN  
1                      NaN  
2                      NaN  
3      

In [21]:
# List the unique values in the car insured information columns
some_car_insured_columns2 = [ 'AlarmImmobiliser','TrackingDevice', 'NewVehicle', 'WrittenOff',
       'Rebuilt', 'Converted', 'CrossBorder']

print("\nUnique values in car insured information columns:")
dataAnalysis.show_unique_values(some_car_insured_columns2)


Unique values in car insured information columns:


{'AlarmImmobiliser': array(['Yes', 'No'], dtype=object),
 'TrackingDevice': array(['No', 'Yes'], dtype=object),
 'NewVehicle': array(['More than 6 months', nan, 'Less than 6 months'], dtype=object),
 'WrittenOff': array([nan, 'No', 'Yes'], dtype=object),
 'Rebuilt': array([nan, 'No', 'Yes'], dtype=object),
 'Converted': array([nan, 'No', 'Yes'], dtype=object),
 'CrossBorder': array([nan, 'No'], dtype=object)}

Half **Cat Issued** columns are CustomValueEstimate', 'AlarmImmobiliser',
       'TrackingDevice', 'CapitalOutstanding', 'NewVehicle', 'WrittenOff',
       'Rebuilt', 'Converted', 'CrossBorder', and 'NumberOfVehiclesInFleet.
- The columns are **object** except two columns **CustomValueEstimate** and **NumberOfVehiclesInFleet** which are float.
- Most columns have **missing values** 
- Most columns have Yes or No value

### Plan

In [22]:
# List the columns overview for plan information
plan = [ 'SumInsured', 'TermFrequency', 'CalculatedPremiumPerTerm',
       'ExcessSelected', 'CoverCategory', 'CoverType', 'CoverGroup', 'Section',
       'Product', 'StatutoryClass', 'StatutoryRiskType']

dataAnalysis.columns_overview(plan)

The first 5 rows
   SumInsured TermFrequency  CalculatedPremiumPerTerm  \
0        0.01       Monthly                   25.0000   
1        0.01       Monthly                   25.0000   
2        0.01       Monthly                   25.0000   
3   119300.00       Monthly                  584.6468   
4   119300.00       Monthly                  584.6468   

                     ExcessSelected CoverCategory   CoverType  \
0             Mobility - Windscreen    Windscreen  Windscreen   
1             Mobility - Windscreen    Windscreen  Windscreen   
2             Mobility - Windscreen    Windscreen  Windscreen   
3  Mobility - Metered Taxis - R2000    Own damage  Own Damage   
4  Mobility - Metered Taxis - R2000    Own damage  Own Damage   

             CoverGroup              Section                          Product  \
0  Comprehensive - Taxi  Motor Comprehensive  Mobility Metered Taxis: Monthly   
1  Comprehensive - Taxi  Motor Comprehensive  Mobility Metered Taxis: Monthly   
2  Com

In [23]:
# List the unique values in the plan information columns
plans = [ 'TermFrequency', 'StatutoryClass', 'StatutoryRiskType']

print("\nUnique values in plan information columns:")
dataAnalysis.show_unique_values(plans)


Unique values in plan information columns:


{'TermFrequency': array(['Monthly', 'Annual'], dtype=object),
 'StatutoryClass': array(['Commercial'], dtype=object),
 'StatutoryRiskType': array(['IFRS Constant'], dtype=object)}

The **Plan** columns are 'SumInsured', 'TermFrequency', 'CalculatedPremiumPerTerm',
       'ExcessSelected', 'CoverCategory', 'CoverType', 'CoverGroup', 'Section',
       'Product', 'StatutoryClass', and 'StatutoryRiskType'.
- **TermFrequency**: **2** unique value which are 'Monthly', and 'Annual'.
- **StatutoryClass**: **1** unique values: Commercial.
- **StatutoryRiskType**: **1** unique values: IFRS Constant.
- The columns are **object** except two columns **SumInsured** and **CalculatedPremiumPerTerm** which are float.
- **No missing values** in all columns.

### Payment and Claim

In [24]:
# List the columns overview for payment_and_claim information
payment_and_claim = [ 'TotalPremium', 'TotalClaims']

dataAnalysis.columns_overview(payment_and_claim)

The first 5 rows
   TotalPremium  TotalClaims
0     21.929825          0.0
1     21.929825          0.0
2      0.000000          0.0
3    512.848070          0.0
4      0.000000          0.0
----------------------------------
The last 5 rows
         TotalPremium  TotalClaims
1000093    347.235175          0.0
1000094    347.235175          0.0
1000095    347.235175          0.0
1000096      2.315000          0.0
1000097      2.315000          0.0
----------------------------------
The date type of the columns:
TotalPremium    float64
TotalClaims     float64
dtype: object
----------------------------------
The count of unique values in the columns:
TotalPremium    38959
TotalClaims      1615
dtype: int64
----------------------------------
Check for missing values:
TotalPremium    0
TotalClaims     0
dtype: int64


The **Payment and Claim** columns are 'TotalPremium', and 'TotalClaims'.
- The columns are **float**.
- **No missing values** in all columns.

In [25]:
# Save processed data to csv
csvfile = '../data/cleaned/ProcessedMachineLearningRating_v3.csv'

df.to_csv(csvfile, index=False)
print(f"Data saved as {csvfile}")

Data saved as ../data/cleaned/ProcessedMachineLearningRating_v3.csv


In [26]:
# Data types overview
dtype_report = df.dtypes

# Categorical variables check
categorical_cols = df.select_dtypes(include=['object', 'category']).columns
for col in categorical_cols:
    print(f"{col}: {df[col].nunique()} unique values")

# Date variables check
date_cols = df.select_dtypes(include=['datetime64']).columns
for col in date_cols:
    print(f"{col} range: {df[col].min()} to {df[col].max()}")

Citizenship: 4 unique values
LegalType: 6 unique values
Title: 5 unique values
Language: 1 unique values
Bank: 11 unique values
AccountType: 3 unique values
MaritalStatus: 3 unique values
Gender: 3 unique values
Country: 1 unique values
Province: 9 unique values
MainCrestaZone: 16 unique values
SubCrestaZone: 45 unique values
ItemType: 1 unique values
VehicleType: 5 unique values
make: 46 unique values
Model: 411 unique values
bodytype: 13 unique values
AlarmImmobiliser: 2 unique values
TrackingDevice: 2 unique values
CapitalOutstanding: 1011 unique values
NewVehicle: 2 unique values
WrittenOff: 2 unique values
Rebuilt: 2 unique values
Converted: 2 unique values
CrossBorder: 1 unique values
TermFrequency: 2 unique values
ExcessSelected: 13 unique values
CoverCategory: 28 unique values
CoverType: 22 unique values
CoverGroup: 14 unique values
Section: 5 unique values
Product: 4 unique values
StatutoryClass: 1 unique values
StatutoryRiskType: 1 unique values
TransactionMonth range: 2013-1