In [1]:
from italian_csv_type_prediction.dataframe_generators import SimpleDatasetGenerator
from italian_csv_type_prediction.models import TypePredictor
from tqdm.auto import tqdm
import pandas as pd
import numpy as np
from multiprocessing import Pool, cpu_count
from sklearn.metrics import accuracy_score, balanced_accuracy_score

In [2]:
def dataset_generation(number:int):
    return SimpleDatasetGenerator().build(number, verbose=False)

def _dataset_generation(args):
    return dataset_generation(*args)

def parallel_dataset_generation(number:int):
    processes = min(cpu_count()*5, number)
    with Pool(cpu_count()) as p:
        Xs, ys = list(zip(*tqdm(
            p.imap(_dataset_generation, (
                (number//processes, )
                for _ in range(processes)
            )),
            total=processes,
            leave=False
        )))
    return np.vstack(Xs), np.concatenate(ys)

In [3]:
x_train, y_train = parallel_dataset_generation(100)
x_test, y_test = parallel_dataset_generation(100)

HBox(children=(FloatProgress(value=0.0, max=60.0), HTML(value='')))



HBox(children=(FloatProgress(value=0.0, max=60.0), HTML(value='')))



In [4]:
model = TypePredictor()

model.fit(x_train, y_train)

In [5]:
y_pred = model._model.predict(x_test)

In [6]:
accuracy_score(y_test, y_pred), balanced_accuracy_score(y_test, y_pred)

(0.8542660081743869, 0.8069061640365619)

In [7]:
from random_csv_generator import random_csv

df = random_csv(10)

In [9]:
df

Unnamed: 0,region,province,surname,name,sex,birth_municipality,birth_province,birth_region,birth_cap,birth_province_code,birthdate,address,house_number,cap,municipality,province_code,codice_fiscale,total_debit,payed_debit
0,Toscana,Siena,Acquadro,Marisa,F,Masserano,Biella,Piemonte,13866,BI,1944-04-13,Via T. Gazzei,57,53030,Radicondoli,SI,CQDMRS44D53F042J,"Eu 32.240,00","Eu 16.967,00"
1,Campania,Napoli,Fabbri,Paola,F,Borgo Tossignano,Bologna,Emilia Romagna,40021,BO,1971-08-12,Piazza Margherita,33,80040,San Gennaro Vesuviano,,FBBPLA71M52B044Q,"Eu 57.746,00","Eu 12.215,00"
2,Piemonte,Torino,Rosati,Daniel,M,Ascrea,Rieti,Lazio,2020,RI,1947-07-27,Via Val Della Torre,135,10091,Alpignano,TO,RSTDNL47L27A464H,"Eu 12.944,00","Eu 3.491,00"
3,Veneto,Padova,Santilli,Andrea,M,Pescocostanzo,L'Aquila,Abruzzo,67033,AQ,1998-03-01,Via Roma,2,35010,Villafranca Padovana,PD,SNTNDR98C01G493L,"Eu 73.615,00","Eu 32.832,00"
4,Calabria,Cosenza,Mauro,Diego,M,Forgaria Nel Friuli,Udine,Friuli Venezia Giulia,33030,UD,1947-08-27,Via Giovanni Xiii,5,87010,Mottafollone,CS,MRADGI47M27D700G,"Eu 72.091,00","Eu 46.992,00"
5,Sicilia,Ragusa,Furlan,Aniello,M,San Dorligo Della Valle-dolina,Trieste,Friuli Venezia Giulia,34018,TS,1974-08-17,Piazza Lenin,2,97018,Scicli,RG,FRLNLL74M17D324K,"Eu 11.433,00","Eu 2.089,00"
6,Lombardia,Como,Bortoluzzi,Fabrizio Ernesto,M,Belluno,Belluno,Veneto,32100,BL,1969-08-12,Via Marco Da Campione,6,22060,Campione D'italia,CO,BRTFRZ69M12A757A,"Eu 97.909,00","Eu 46.174,00"
7,Marche,Ascoli Piceno,Di Girolamo,Nilda,F,San Felice Circeo,Latina,Lazio,4017,LT,1987-02-08,Via Schiavi,31,63041,Acquasanta Terme,AP,DGRNLD87B48H836E,"Eu 75.919,00","Eu 40.972,00"
8,Emilia Romagna,Piacenza,Dutto,Rina,F,Martiniana Po,Cuneo,Piemonte,12030,CN,1971-03-08,Via San Rocco,16,29029,Rivergaro,PC,DTTRNI71C48E988B,"Eu 85.380,00","Eu 67.399,00"
9,Lazio,Roma,Caruso,Debora,F,Partinico,Palermo,Sicilia,90047,PA,1980-08-14,Via Rocca Priora,60,179,Roma,RM,CRSDBR80M54G348L,"Eu 69.991,00","Eu 45.636,00"


In [8]:
model.predict_dataframe(df)

Unnamed: 0,region,province,surname,name,sex,birth_municipality,birth_province,birth_region,birth_cap,birth_province_code,birthdate,address,house_number,cap,municipality,province_code,codice_fiscale,total_debit,payed_debit
0,Region,Municipality,Name,Name,BiologicalSex,Municipality,Municipality,Region,CAP,ProvinceCode,Date,Address,CAP,CAP,Municipality,ProvinceCode,CodiceFiscale,Currency,Currency
1,Region,Municipality,Name,Name,BiologicalSex,Municipality,Municipality,Region,CAP,ProvinceCode,Date,Address,CAP,CAP,Municipality,,CodiceFiscale,Currency,Currency
2,Region,Municipality,Name,Name,BiologicalSex,Municipality,Municipality,Region,CAP,ProvinceCode,Date,Address,CAP,CAP,Municipality,ProvinceCode,CodiceFiscale,Currency,Currency
3,Region,Municipality,Name,Name,BiologicalSex,Municipality,Municipality,Region,CAP,ProvinceCode,Date,Address,CAP,CAP,Municipality,ProvinceCode,CodiceFiscale,Currency,Currency
4,Region,Municipality,Name,Name,BiologicalSex,Municipality,Municipality,Region,CAP,ProvinceCode,Date,Address,CAP,CAP,Municipality,ProvinceCode,CodiceFiscale,Currency,Currency
5,Region,Municipality,Name,Name,BiologicalSex,Name,Municipality,Region,CAP,ProvinceCode,Date,Address,CAP,CAP,Municipality,ProvinceCode,CodiceFiscale,Currency,Currency
6,Region,Municipality,Name,Name,BiologicalSex,Municipality,Municipality,Region,CAP,ProvinceCode,Date,Address,CAP,CAP,Municipality,ProvinceCode,CodiceFiscale,Currency,Currency
7,Region,Municipality,Name,Name,BiologicalSex,Municipality,Municipality,Region,CAP,ProvinceCode,Date,Address,CAP,CAP,Municipality,ProvinceCode,CodiceFiscale,Currency,Currency
8,Region,Municipality,Name,Name,BiologicalSex,Municipality,Municipality,Region,CAP,ProvinceCode,Date,Address,CAP,CAP,Municipality,ProvinceCode,CodiceFiscale,Currency,Currency
9,Region,Municipality,Name,Name,BiologicalSex,Municipality,Municipality,Region,CAP,ProvinceCode,Date,Address,CAP,CAP,Municipality,ProvinceCode,CodiceFiscale,Currency,Currency


In [12]:
mask = y_test != y_pred

true_labels = model._embedder._encoder.inverse_transform(y_pred[mask])
predicted_labels = model._embedder._encoder.inverse_transform(y_test[mask])

for true, pred in zip(true_labels, predicted_labels):
    print(true, pred)

String CodiceFiscale
String Document
String CodiceFiscale
String CodiceFiscale
String CodiceFiscale
String CodiceFiscale
String Document
String CodiceFiscale
String CodiceFiscale
String CodiceFiscale
String Document
String Document
String Document
String Document
String CodiceFiscale
String Document
String Document
String CodiceFiscale
String Document
String CodiceFiscale
String Document
String CodiceFiscale
String CodiceFiscale
String Document
String CodiceFiscale
String CodiceFiscale
String CodiceFiscale
String CodiceFiscale
String CodiceFiscale
String CodiceFiscale
String CodiceFiscale
String CodiceFiscale
String CodiceFiscale
String CodiceFiscale
String CodiceFiscale
String CodiceFiscale
String CodiceFiscale
String CodiceFiscale
String Document
String CodiceFiscale
String CodiceFiscale
String CodiceFiscale
String CodiceFiscale
String Document
String CodiceFiscale
String CodiceFiscale
String Document
String Document
String CodiceFiscale
String CodiceFiscale
String CodiceFiscale
Stri

String Address
String Address
String Address
String Address
String Address
String Address
String Address
String Address
String Address
String Address
String Address
String Address
String Address
String Address
String Address
String Address
String Address
String Address
String Address
String Address
String Address
String Address
String Address
String Address
String Address
String Address
String Address
String Address
String Address
String Address
String Address
String Address
String Address
String Address
String Address
String Address
String Address
String Address
String Address
String Address
String Address
String Name
String Name
String Name
String Name
String Name
String Name
String Name
String Name
String Name
String Name
String Name
String Name
String Name
String Name
String Name
String Name
String Name
String Name
String Name
String Name
String Name
String Name
String Name
String Name
String Name
String Name
String Name
String Name
String Name
String Name
String Name
String Name
S

String CodiceFiscale
String Document
String CodiceFiscale
String CodiceFiscale
String CodiceFiscale
String Document
String Document
String CodiceFiscale
String CodiceFiscale
String CodiceFiscale
String Document
String CodiceFiscale
String CodiceFiscale
String Document
String Document
String Document
String CodiceFiscale
String CodiceFiscale
String Document
String Document
String CodiceFiscale
String CodiceFiscale
String CodiceFiscale
String CodiceFiscale
String CodiceFiscale
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IVA
CodiceFiscale IV