In [1]:
from italian_csv_type_prediction.dataframe_generators import SimpleDatasetGenerator
from italian_csv_type_prediction.models import TypePredictor
from tqdm.auto import tqdm
import pandas as pd
import numpy as np
from multiprocessing import Pool, cpu_count
from sklearn.metrics import accuracy_score, balanced_accuracy_score

In [2]:
def dataset_generation(number:int):
    return SimpleDatasetGenerator().build(number, verbose=False)

def _dataset_generation(args):
    return dataset_generation(*args)

def parallel_dataset_generation(number:int):
    processes = min(cpu_count()*5, number)
    with Pool(cpu_count()) as p:
        Xs, ys = list(zip(*tqdm(
            p.imap(_dataset_generation, (
                (number//processes, )
                for _ in range(processes)
            )),
            total=processes,
            leave=False
        )))
    return np.vstack(Xs), np.concatenate(ys)

In [3]:
x_train, y_train = parallel_dataset_generation(100)
x_test, y_test = parallel_dataset_generation(100)

HBox(children=(FloatProgress(value=0.0, max=60.0), HTML(value='')))



HBox(children=(FloatProgress(value=0.0, max=60.0), HTML(value='')))



In [4]:
model = TypePredictor()

model.fit(x_train, y_train)

In [5]:
y_pred = model._model.predict(x_test)

In [6]:
accuracy_score(y_test, y_pred), balanced_accuracy_score(y_test, y_pred)

(0.8597298117789921, 0.8134981943454278)

In [7]:
from random_csv_generator import random_csv

df = random_csv(50)

In [8]:
from italian_csv_type_prediction.simple_types import NameType

In [9]:
NameType().validate("Ruwan Samantha Garusinghe", fiscal_code="RMNRNS61P04A882Z")

True

In [10]:
df

Unnamed: 0,region,province,surname,name,sex,birth_municipality,birth_province,birth_region,birth_cap,birth_province_code,birthdate,address,house_number,cap,municipality,province_code,codice_fiscale,total_debit,payed_debit
0,Lazio,Frosinone,Melis,Maria Caterina,F,Soddi',Oristano,Sardegna,9080,OR,1926-07-15,Piazza Guglielmo Marconi,2,3030,Santopadre,FR,MLSMCT26L55I778F,"Eu 11.323,00","Eu 3.460,00"
1,Prov. Auton. Bolzano,Bolzano,Basso,Sandro Giuseppe,M,Treviso,Treviso,Veneto,31100,TV,1923-07-08,Piazza Gerold,1,39020,Parcines,BZ,BSSSDR23L08L407K,"Eu 80.505,00","Eu 15.589,00"
2,Lombardia,Milano,Carta,Mara,F,Budduso',Olbia Tempio,Sardegna,7020,OT,1952-06-17,Corso Europa,42,20060,Pessano Con Bornago,MI,CRTMRA52H57B246F,"Eu 56.832,00","Eu 34.604,00"
3,Puglia,Bari,Zanetti,Klea,F,Fiesse,Brescia,Lombardia,25020,BS,1921-08-19,Via Giuseppe Pastore,43,70010,Sammichele Di Bari,BA,ZNTKLE21M59D576A,"Eu 65.962,00","Eu 14.580,00"
4,Sicilia,Catania,Bianchi,Piera Luigia,F,Castiglion Fiorentino,Arezzo,Toscana,52043,AR,1998-07-13,Viale Mario Milazzo,161,95041,Caltagirone,CT,BNCPLG98L53C319P,"Eu 7.215,00","Eu 4.308,00"
5,Lazio,Roma,De Rosa,M'hamed,M,Sorrento,Napoli,Campania,80067,,1968-05-12,Viale Europa,78,144,Roma,RM,DRSMMD68E12I862P,"Eu 66.946,00","Eu 62.300,00"
6,Lombardia,Bergamo,Serra,Steven Wilfried M,M,Nulvi,Sassari,Sardegna,7032,SS,1932-01-19,Via Roma,8/a - 8/b,24030,Terno D'isola,BG,SRRSVN32A19F977E,"Eu 19.428,00","Eu 2.840,00"
7,Emilia Romagna,Ravenna,Perrone,Giovanna,F,Lequile,Lecce,Puglia,73010,LE,1920-05-18,Via Fiume Abbandonato,124,48100,Ravenna,RA,PRRGNN20E58E538O,"Eu 11.087,00","Eu 7.399,00"
8,Piemonte,Novara,Siri,Mario,M,Onzo,Savona,Liguria,17037,SV,1989-05-16,Strada Statale Ticinese 32,17/19Ticinese 17/19,28040,Varallo Pombia,NO,SRIMRA89E16G076U,"Eu 29.873,00","Eu 16.896,00"
9,Lazio,Roma,Conti,Sila,F,Montaione,Firenze,Toscana,50050,FI,1931-09-21,Via Archeologia,58,133,Roma,RM,CNTSLI31P61F398P,"Eu 10.459,00","Eu 389,00"


In [11]:
model.predict_dataframe(df)

Unnamed: 0,region,province,surname,name,sex,birth_municipality,birth_province,birth_region,birth_cap,birth_province_code,birthdate,address,house_number,cap,municipality,province_code,codice_fiscale,total_debit,payed_debit
0,Region,Municipality,Name,Name,BiologicalSex,Municipality,Municipality,Region,CAP,ProvinceCode,Date,Address,Currency,CAP,Municipality,ProvinceCode,CodiceFiscale,Currency,Currency
1,Name,Municipality,Name,Name,BiologicalSex,Municipality,Municipality,Region,CAP,ProvinceCode,Date,Address,Currency,CAP,Municipality,ProvinceCode,CodiceFiscale,Currency,Currency
2,Region,Municipality,Name,Name,BiologicalSex,Municipality,Name,Region,CAP,ProvinceCode,Date,Address,Currency,CAP,Municipality,ProvinceCode,CodiceFiscale,Currency,Currency
3,Region,Municipality,Name,Name,BiologicalSex,Municipality,Municipality,Region,CAP,ProvinceCode,Date,Address,Currency,CAP,Municipality,ProvinceCode,CodiceFiscale,Currency,Currency
4,Region,Municipality,Name,Name,BiologicalSex,Municipality,Municipality,Region,CAP,ProvinceCode,Date,Address,Currency,CAP,Municipality,ProvinceCode,CodiceFiscale,Currency,Currency
5,Region,Municipality,Name,Name,BiologicalSex,Municipality,Municipality,Region,CAP,,Date,Address,Currency,CAP,Municipality,ProvinceCode,CodiceFiscale,Currency,Currency
6,Region,Municipality,Name,Name,BiologicalSex,Municipality,Municipality,Region,CAP,ProvinceCode,Date,Address,Name,CAP,Municipality,ProvinceCode,CodiceFiscale,Currency,Currency
7,Region,Municipality,Name,Name,BiologicalSex,Municipality,Municipality,Region,CAP,ProvinceCode,Date,Address,Currency,CAP,Municipality,ProvinceCode,CodiceFiscale,Currency,Currency
8,Region,Municipality,Name,Name,BiologicalSex,Municipality,Municipality,Region,CAP,ProvinceCode,Date,Address,Name,CAP,Municipality,ProvinceCode,CodiceFiscale,Currency,Currency
9,Region,Municipality,Name,Name,BiologicalSex,Municipality,Municipality,Region,CAP,ProvinceCode,Date,Address,Currency,CAP,Municipality,ProvinceCode,CodiceFiscale,Currency,Currency


In [10]:
mask = y_test != y_pred

true_labels = model._embedder._encoder.inverse_transform(y_pred[mask])
predicted_labels = model._embedder._encoder.inverse_transform(y_test[mask])

for true, pred in zip(true_labels, predicted_labels):
    print(true, pred)

Surname Document
Surname CodiceFiscale
Surname CodiceFiscale
Surname Document
Surname Document
Surname Document
Surname Document
Surname CodiceFiscale
Surname Document
Surname CodiceFiscale
Surname Document
Surname Document
Surname Document
Surname Document
Surname CodiceFiscale
Surname Document
Surname CodiceFiscale
Surname Document
Surname CodiceFiscale
Surname CodiceFiscale
Surname CodiceFiscale
Surname Document
Surname Document
Surname Document
Surname Document
Surname CodiceFiscale
Surname CodiceFiscale
Surname Document
Surname CodiceFiscale
Surname Document
Surname Document
Surname CodiceFiscale
Surname Document
Surname CodiceFiscale
Surname Document
Surname Document
Surname CodiceFiscale
Surname CodiceFiscale
Surname CodiceFiscale
Surname Document
Surname Document
Surname CodiceFiscale
Surname CodiceFiscale
Surname CodiceFiscale
Surname CodiceFiscale
Surname CodiceFiscale
Surname Document
Surname CodiceFiscale
Surname CodiceFiscale
Surname Document
CodiceFiscale IVA
CodiceFiscal

Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String


Surname CodiceFiscale
Surname CodiceFiscale
Surname CodiceFiscale
Surname CodiceFiscale
Surname CodiceFiscale
Surname Document
Surname Document
Surname CodiceFiscale
Surname Document
Surname Document
Surname Document
Surname CodiceFiscale
Surname Document
Surname Document
Surname CodiceFiscale
Surname CodiceFiscale
Surname CodiceFiscale
Surname CodiceFiscale
Surname CodiceFiscale
Surname CodiceFiscale
Surname CodiceFiscale
Surname Document
Surname Document
Surname CodiceFiscale
Surname CodiceFiscale
Surname Document
Surname CodiceFiscale
Surname CodiceFiscale
Surname CodiceFiscale
Surname Document
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA

CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String


Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname Document
Surname Document
Surname Document
Surname CodiceFiscale
Surname Document
Surname CodiceFiscale
Surname CodiceFiscale
Surname CodiceFiscale
Surname Document
Surname CodiceFiscale
Surname CodiceFiscale
Surname Document
Surname CodiceFiscale
Surname CodiceFiscale
Surname CodiceFiscale
Surname Document
Surname CodiceFiscale
Surname CodiceFiscale
Surname Document
Surname Document
Surname Document
Surname Document
Surname Document
Surname 

Surname Address
Surname Address
Surname Address
Surname Address
Surname Address
Surname Address
Surname Address
Surname Address
Surname Address
Surname Address
Surname Address
Surname Address
Surname Address
Surname Address
Surname Address
Surname Address
Surname Address
Surname Address
Surname Address
Surname Address
Surname Address
Surname Address
Surname Address
Surname Address
Surname Address
Surname Address
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name

Surname CodiceFiscale
Surname Document
Surname Document
Surname Document
Surname Document
Surname Document
Surname CodiceFiscale
Surname CodiceFiscale
Surname Document
Surname Document
Surname Document
Surname CodiceFiscale
Surname Document
Surname Document
Surname Document
Surname Document
Surname Document
Surname CodiceFiscale
Surname CodiceFiscale
Surname CodiceFiscale
Surname Document
Surname Document
Surname CodiceFiscale
Surname CodiceFiscale
Surname CodiceFiscale
Surname Document
Surname Document
Surname CodiceFiscale
Surname Document
Surname CodiceFiscale
Surname CodiceFiscale
Surname CodiceFiscale
Surname Document
Surname Document
Surname CodiceFiscale
Surname Document
Surname Document
Surname CodiceFiscale
Surname CodiceFiscale
Surname CodiceFiscale
Surname Document
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale I

Surname Document
Surname CodiceFiscale
Surname Document
Surname Document
Surname CodiceFiscale
Surname CodiceFiscale
Surname Document
Surname CodiceFiscale
Surname CodiceFiscale
Surname Document
Surname Document
Surname Document
Surname CodiceFiscale
Surname CodiceFiscale
Surname CodiceFiscale
Surname Document
Surname Document
Surname Document
Surname CodiceFiscale
Surname Document
Surname Document
Surname CodiceFiscale
Surname CodiceFiscale
Surname Document
Surname Document
Surname CodiceFiscale
Surname CodiceFiscale
Surname Document
Surname CodiceFiscale
Surname CodiceFiscale
Surname Document
Surname Document
Surname Document
Surname CodiceFiscale
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA


Boolean NaN
Boolean NaN
Boolean NaN
Boolean NaN
Boolean NaN
Boolean NaN
Boolean NaN
Boolean NaN
Boolean NaN
Surname CodiceFiscale
Surname CodiceFiscale
Surname CodiceFiscale
Surname Document
Surname CodiceFiscale
Surname Document
Surname Document
Surname CodiceFiscale
Surname CodiceFiscale
Surname CodiceFiscale
Surname Document
Surname Document
Surname Document
Surname Document
Surname CodiceFiscale
Surname CodiceFiscale
Surname Document
Surname Document
Surname Document
Surname CodiceFiscale
Surname CodiceFiscale
Surname CodiceFiscale
Surname CodiceFiscale
Surname CodiceFiscale
Surname CodiceFiscale
Surname CodiceFiscale
Surname Document
Surname CodiceFiscale
Surname CodiceFiscale
Surname CodiceFiscale
Surname Document
Surname Document
Surname Document
Surname CodiceFiscale
Surname Document
Surname Document
Surname Document
Surname CodiceFiscale
Surname CodiceFiscale
Surname CodiceFiscale
Surname CodiceFiscale
Surname CodiceFiscale
Surname Document
Surname Document
Surname CodiceFisca

Surname Document
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
Surname Address
Surname Address
Surname Address
Surname Address
Surname Address
Surname Address
Surname Address
Surname Address
Surname Address
Surname Address
Surname Address
Surname Address
Surname Address
Surname Address
Surname Address
Surname Address
Surname Address
Surname Address
Surname Address
Surname Address
Surname Address
Surname Address
Surname Address
Surname Address
Surname Address
Surname Address
Surname Address
Surname Address
Surname Address
Surname Address
Surname Address
Surname Address
Sur

Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname CodiceFiscale
Surname CodiceFiscale
Surname CodiceFiscale
Surname Document
Surname Document
Surname CodiceFiscale
Surname Document
Surname Document
Surname Document
Surname CodiceFiscale
Surname CodiceFiscale
Surname Document
Surname Document
Surname Document
Surname CodiceFiscale
Surname CodiceFiscale
Surname Document

Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname Name
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname String
Surname Document
Surname Document
Surname Document
Surname CodiceFiscale
Surname Document
Surname CodiceFiscale
Surname Document
Surname CodiceFiscale
Surname Cod