In [4]:
# Read CSV file
import pandas as pd

customers = pd.read_csv("lista clienti_ridotta.csv", dtype=str, header=7)
customers

Unnamed: 0,ragione sociale,Partita IVA,email,Città,Indirizzo,CAP,telefono
0,ROMA CAPITALE !!S.P.Q.R.,1057861005,chemerijkb@merriam-webster.com,ROMA,VIA DEL CAMPIDOGLIO 1,57954 CEDEX,+33 (701) 723-2420
1,COMUNE DI MILANO,1199250158,sbennetc@list-manage.com,MILANO,PIAZZA DELLA SCALA 2,91499,+1 (213) 355-7217
2,FERRERO SPA CON UNICO AZIONISTA,934460049,spoteburyd@ovh.net,ALBA,PIAZZALE PIETRO FERRERO,,+62 (532) 104-6624
3,FERRARI S.P.A.,159560366,athurbye@cisco.com,MODENA,VIA EMILIA EST 1163,,+62 (566) 645-6024
4,ENEL SPA,15844561009,fjeenesf@ted.com,ROMA,VIALE REGINA MARGHERITA 137,522068,+57 (732) 174-3419
5,FIAT SPA,469580013,rmedlarg@ed.gov,TORINO,700 Texas Junction,95652 CEDEX,+33 (846) 213-0769
6,AUTOMOBILI LAMBORGHINI S.P.A.,591801204,pitzkovitchh@ning.com,BO,VIA MODENA 12,197758,+7 (886) 527-1083


In [None]:
# Import html page and scrape data
from bs4 import BeautifulSoup as bs

with open("./companies.html", "r") as f:
    soup = bs(f.read(), "html.parser")

companies_html_elements = soup.body.ul.find_all("li")
companies = []
for company in companies_html_elements:
    companies.append(company.span.text)

['ROMA CAPITALE !!S.P.Q.R.',
 'COMUNE DI MILANO',
 'FERRERO SPA CON UNICO AZIONISTA',
 'FERRARI S.P.A.',
 'GRUPPO IVA ENEL',
 'FIAT SPA',
 'AUTOMOBILI LAMBORGHINI S.P.A.',
 'Teklist',
 'Innojam',
 'Tagchat']

In [None]:
# Enrich dataset
customers["Valid"] = False
for index, company in customers.iterrows():
    if company["ragione sociale"] in companies:
        print(f'company {company["ragione sociale"]} found')
        customers.at[index, "Valid"] = True
    else:
        print(f'company {company["ragione sociale"]} NOT found')
        customers.at[index, "Valid"] = False

company ROMA CAPITALE !!S.P.Q.R. found
company COMUNE DI MILANO found
company FERRERO SPA CON UNICO AZIONISTA found
company FERRARI S.P.A. found
company ENEL SPA NOT found
company FIAT SPA found
company AUTOMOBILI LAMBORGHINI S.P.A. found


Unnamed: 0,ragione sociale,Partita IVA,email,Città,Indirizzo,CAP,telefono,Valid
0,ROMA CAPITALE !!S.P.Q.R.,1057861005,chemerijkb@merriam-webster.com,ROMA,VIA DEL CAMPIDOGLIO 1,57954 CEDEX,+33 (701) 723-2420,True
1,COMUNE DI MILANO,1199250158,sbennetc@list-manage.com,MILANO,PIAZZA DELLA SCALA 2,91499,+1 (213) 355-7217,True
2,FERRERO SPA CON UNICO AZIONISTA,934460049,spoteburyd@ovh.net,ALBA,PIAZZALE PIETRO FERRERO,,+62 (532) 104-6624,True
3,FERRARI S.P.A.,159560366,athurbye@cisco.com,MODENA,VIA EMILIA EST 1163,,+62 (566) 645-6024,True
4,ENEL SPA,15844561009,fjeenesf@ted.com,ROMA,VIALE REGINA MARGHERITA 137,522068,+57 (732) 174-3419,False
5,FIAT SPA,469580013,rmedlarg@ed.gov,TORINO,700 Texas Junction,95652 CEDEX,+33 (846) 213-0769,True
6,AUTOMOBILI LAMBORGHINI S.P.A.,591801204,pitzkovitchh@ning.com,BO,VIA MODENA 12,197758,+7 (886) 527-1083,True


In [12]:
# Call VEIS to get company info
import requests
def find_eu_company_info(vat):
    result = requests.get("https://ec.europa.eu/taxation_customs/vies/rest-api/ms/IT/vat/" + vat)
    result_json = result.json()

    if result_json["isValid"] == False:
        print(f'Invalid VAT found: {vat}')
        return (None, None)

    return (result_json["name"], result_json["address"])

In [13]:
# Loop over companies and enrich with VEIS data
customers["Valid VAT"] = False
for index, company in customers.iterrows():
    try:
        (name, address) = find_eu_company_info(company["Partita IVA"])
        print(f'found company {company["ragione sociale"]} with name {name}')
        if name is not None:
            customers.at[index, "Valid VAT"] = True
    except Exception as e:
        print(f'Error while retrieving data for company {e}')

customers

found company ROMA CAPITALE !!S.P.Q.R. with name ROMA CAPITALE !!S.P.Q.R.
found company COMUNE DI MILANO with name COMUNE DI MILANO
found company FERRERO SPA CON UNICO AZIONISTA with name FERRERO SPA CON UNICO AZIONISTA
found company FERRARI S.P.A. with name FERRARI S.P.A.
found company ENEL SPA with name GRUPPO IVA ENEL
Invalid VAT found: 00469580013
found company FIAT SPA with name None
found company AUTOMOBILI LAMBORGHINI S.P.A. with name AUTOMOBILI LAMBORGHINI S.P.A.


Unnamed: 0,ragione sociale,Partita IVA,email,Città,Indirizzo,CAP,telefono,Valid,Valid VAT
0,ROMA CAPITALE !!S.P.Q.R.,1057861005,chemerijkb@merriam-webster.com,ROMA,VIA DEL CAMPIDOGLIO 1,57954 CEDEX,+33 (701) 723-2420,True,True
1,COMUNE DI MILANO,1199250158,sbennetc@list-manage.com,MILANO,PIAZZA DELLA SCALA 2,91499,+1 (213) 355-7217,True,True
2,FERRERO SPA CON UNICO AZIONISTA,934460049,spoteburyd@ovh.net,ALBA,PIAZZALE PIETRO FERRERO,,+62 (532) 104-6624,True,True
3,FERRARI S.P.A.,159560366,athurbye@cisco.com,MODENA,VIA EMILIA EST 1163,,+62 (566) 645-6024,True,True
4,ENEL SPA,15844561009,fjeenesf@ted.com,ROMA,VIALE REGINA MARGHERITA 137,522068,+57 (732) 174-3419,False,True
5,FIAT SPA,469580013,rmedlarg@ed.gov,TORINO,700 Texas Junction,95652 CEDEX,+33 (846) 213-0769,True,False
6,AUTOMOBILI LAMBORGHINI S.P.A.,591801204,pitzkovitchh@ning.com,BO,VIA MODENA 12,197758,+7 (886) 527-1083,True,True


In [14]:
# Save to disk
customers.to_csv("result.csv", index=False)