In [None]:
from suds.client import Client
import pdfkit
from IPython.core.display import HTML
import re
from abydos.phonetic import FONEM, Phonet, Dolby, RussellIndex, Phonem, PHONIC
import time
from abydos.distance import sim
import pandas as pd
from PyPDF2 import PdfMerger
#function to search for company name and return dataframe
import json
import requests
from pandas import json_normalize
import unicodedata
import phonetics
import xml.etree.ElementTree as et 


In [None]:
#connect to widl using required authentification
#to do create python environment to hide password and username
url = 'https://www.swissreg.ch/services11/SwissregWebService?wsdl'
client = Client(url, username='legalmarque', password='dvMXQsUzcIXEZMdAD6Ls')

In [None]:
query = input("Quelle marque souhaitez-vous chercher ? ")

Quelle marque souhaitez-vous chercher ? vaya


In [None]:
#remove accents function
def remove_accents(input_str):
    nfkd_form = unicodedata.normalize('NFKD', input_str)
    return u"".join([c for c in nfkd_form if not unicodedata.combining(c)])

In [None]:
#to do match queries without vowels
vowels = r'[AEIOUÀÄÈÉÊÖÜ]'

def anti_vowel(s):
    """Remove vowels from string."""
    result = re.sub(vowels, '', s, flags=re.IGNORECASE)
    return result

In [None]:
#string rotation
def vowel_rotate(string):
    """rotate vowels in query only if vowels exist"""
    results = []

    sub_function = lambda x: 'eiouaEIOUA'['aeiouAEIOU'.find(x.group(0))]

    for x in range(5):
        string = re.sub('(?i)[aeiou]', sub_function, string)
        results.append(string)
    return ",".join(results)


In [None]:
pe_fr = FONEM()
pe_de = Phonet()
db = Dolby()
pe_gm = Phonem()
pe_ph = PHONIC()

In [None]:
#search by query name according to documentation
Ip_Right = client.service.searchIpRight("CH-TM", 'tmText' '=' f'{vowel_rotate(query)}{query} {anti_vowel(query)} {db.encode_alpha(query)} {pe_gm.encode(query)} {pe_fr.encode(query)} ')

In [None]:
#save query result in csv format
with open('ip_right.csv', 'w') as f:
    f.write(Ip_Right)

In [None]:
#use query result as input to generate both trademark & bname xml
response = client.service.getIpRightXML("CH-TM", Ip_Right)

# **Marque Visuelle**

In [None]:
def retrieve_marque_data(transac):
    """"""

    basappn = transac.find("basappn").text
    basregn = transac.find("basregn").text if transac.find("basregn") is not None else None
    marpicn = transac.find("marpicn").text
    

    
    # Classes
    gsgr = transac.find("gsgr")
    intreggs = gsgr.findall("intregg")

    classes = []

    for intregg in intreggs:
        
        nicclai = intregg.find("nicclai").text
        
        classes.append(int(nicclai))

    

    regadrs = transac.findall("regadr")
    
    # TODO check format of mandataire/titulaire
    titulaire_name = None
    titulaire_addrl = None
    titulaire_plainco = None
    titulaire_nat = None

    
    
    # Addresses
    for regadr in regadrs:
        
        addrrole = regadr.find("addrrole").text
        nameadd = regadr.find("nameadd")
        
        namel = nameadd.find("namel").text if nameadd.find("namel") is not None else None
        addrl = nameadd.find("addrl").text if nameadd.find("addrl") is not None else None
        
        plainco = nameadd.find("plainco").text
        nat = nameadd.find("nat").text
        
        # 1: titulaire
        if int(addrrole) == 1:
            titulaire_name = namel
            titulaire_addrl = addrl
            titulaire_plainco = plainco
            titulaire_nat = nat
        
        
        
    # TODO verify what oppositiongr and oppositionstate mean
    oppositiongr = transac.find("oppositiongr")
    oppositionstate = oppositiongr.find("oppositionstate")


    row = [basappn,basregn,marpicn,
           classes, 
           titulaire_name,
           titulaire_addrl,
           titulaire_plainco,
           titulaire_nat]
    
    return row

In [None]:
def parse_xml(xml_file, encoding="utf-8"):
    
    rows = []
    
    # Parse file
    xtree = et.parse(xml_file)
    xroot = xtree.getroot()

    for node in xroot:
        if node.tag == 'transac':
            for transac in node:
                if transac.tag == 'marinfo':
                    # Marques verbales
                    marpicn = transac.find("marpicn")
                    if marpicn is not None:
                        row = retrieve_marque_data(transac)
                        rows.append(row)
    return rows

# **Visual Data Cleaning and Preparation**

**Merge, Rename and drop unused Columns**

In [None]:
#display trademark logo in dataframe and convert to html
def to_img_tag(path):
    return '<img src="'+ path + '" width="80"  >'

In [None]:
def clean_marque_visuelle(xml_file_path):
    with open(xml_file_path, 'r') as xml_file:
        rows = parse_xml(xml_file)
    tables = []
    tables.extend(rows)
    columns = ['basappn','basregn','marpicn', 
           'classes', 
           'titulaire_name',
           'titulaire_addrl',
           'titulaire_plainco',
           'titulaire_nat']
    
    df = pd.DataFrame(tables, columns=columns)
    df['titulaire'] = df.titulaire_name.fillna('') + ' ' + df.titulaire_addrl.fillna('') + ' ' + df.titulaire_plainco.fillna('') + ' ' + df.titulaire_nat.fillna('')
    df.drop(['titulaire_name', 'titulaire_addrl', 'titulaire_plainco',
       'titulaire_nat'], axis=1, inplace=True)
    #old,new
    df.rename(columns = {'classes':'Classes de Nice', 'basappn':'No de la demande', 'basregn':'No de la marque','basannd':'Depot', 'marpicn':'Logo' }, inplace = True)
    df['titulaire'] = df['titulaire'].apply(lambda x: remove_accents(x)).str.replace(r'\n', '', regex=True)
    df['Classes de Nice'] = df['Classes de Nice'].apply(lambda x: ','.join(map(str, x)))
    
    
    df_html = df.to_html(escape=False,formatters=dict(Logo=to_img_tag)).replace('<table border="1" class="dataframe">',
                                                              '<table border="1" style="border-collapse: '
                                                              'collapse;text-align: center;font-family:Calibri;font-size:10px;" '
                                                              'class="dataframe">').replace(
                                                              '<tr style="text-align: right;">', 
                                                              '<tr style="text-align: center;table-layout: fixed; width: auto; index:False";>')
    
    #save html result
    with open("marque_logo.html", "w") as f:
        f.write(df_html)
        
    try:
        config = pdfkit.configuration(wkhtmltopdf='C:\\Program Files\\wkhtmltopdf\\bin\\wkhtmltopdf.exe')
        return pdfkit.from_file('C:/Users/eakli/marque_logo.html', 'marque_logo.pdf',  configuration=config)    
    
    except OSError:
        return pdfkit.from_file('C:/Users/eakli/marque_logo.html', 'marque_logo.pdf',  verbose=True)
        print("not present in PATH")
        
        
    

In [None]:
clean_marque_visuelle("C:/Users/eakli/response.xml")

True

# **Marque Verbal**

In [None]:
def retrieve_marque_data(transac):
    """"""

    basappn = transac.find("basappn").text
    basregn = transac.find("basregn").text if transac.find("basregn") is not None else None
    markve = transac.find("markve").text
    

    
    # Classes
    gsgr = transac.find("gsgr")
    intreggs = gsgr.findall("intregg")

    classes = []

    for intregg in intreggs:
        
        nicclai = intregg.find("nicclai").text
        
        classes.append(int(nicclai))

    

    regadrs = transac.findall("regadr")
    
    # TODO check format of mandataire/titulaire
    titulaire_name = None
    titulaire_addrl = None
    titulaire_plainco = None
    titulaire_nat = None

    
    
    # Addresses
    for regadr in regadrs:
        
        addrrole = regadr.find("addrrole").text
        nameadd = regadr.find("nameadd")
        
        namel = nameadd.find("namel").text if nameadd.find("namel") is not None else None
        addrl = nameadd.find("addrl").text if nameadd.find("addrl") is not None else None
        
        plainco = nameadd.find("plainco").text
        nat = nameadd.find("nat").text
        
        # 1: titulaire
        if int(addrrole) == 1:
            titulaire_name = namel
            titulaire_addrl = addrl
            titulaire_plainco = plainco
            titulaire_nat = nat
        
        
        
    # TODO verify what oppositiongr and oppositionstate mean
    oppositiongr = transac.find("oppositiongr")
    oppositionstate = oppositiongr.find("oppositionstate")


    row = [basappn,basregn,markve,
           classes, 
           titulaire_name,
           titulaire_addrl,
           titulaire_plainco,
           titulaire_nat]
    
    return row

In [None]:
def parse_xml(xml_file):
    
    rows = []
    
    # Parse file
    xtree = et.parse(xml_file)
    xroot = xtree.getroot()

    for node in xroot:
        if node.tag == 'transac':
            for transac in node:
                if transac.tag == 'marinfo':
                    # Marques verbales
                    markve = transac.find("markve")
                    if markve is not None:
                        row = retrieve_marque_data(transac)
                        rows.append(row)
    return rows

In [None]:
def clean_marque_verbal(xml_file_path):
    with open(xml_file_path, 'r') as xml_file:
        rows = parse_xml(xml_file)
    tables = []
    tables.extend(rows)
    columns = ['basappn','basregn','markve', 
           'classes', 
           'titulaire_name',
           'titulaire_addrl',
           'titulaire_plainco',
           'titulaire_nat']
    
    df = pd.DataFrame(tables, columns=columns)
    df['titulaire'] = df.titulaire_name.fillna('') + ' ' + df.titulaire_addrl.fillna('') + ' ' + df.titulaire_plainco.fillna('') + ' ' + df.titulaire_nat.fillna('')
    df.drop(['titulaire_name', 'titulaire_addrl', 'titulaire_plainco',
       'titulaire_nat'], axis=1, inplace=True)
    #old,new
    df.rename(columns = {'classes':'Classes de Nice', 'basappn':'No de la demande', 'basregn':'No de la marque','basannd':'Depot', 'markve':'marque' }, inplace = True)
    df['titulaire'] = df['titulaire'].apply(lambda x: remove_accents(x)).str.replace(r'\n', '', regex=True)
    df['Classes de Nice'] = df['Classes de Nice'].apply(lambda x: ','.join(map(str, x)))
    
    
    df_verbal_html = df.to_html().replace('<table border="1" class="dataframe">',
                                                              '<table border="1" style="border-collapse: '
                                                              'collapse;text-align: center;font-family:Calibri;font-size:10px;" '
                                                              'class="dataframe">').replace(
                                                              '<tr style="text-align: right;">', '<tr style="text-align: center;table-layout: fixed; width: auto; index:False";>')
    
    #save html result
    with open("marque_verbal.html", "w") as f:
        f.write(df_verbal_html)
        
    try:
        config = pdfkit.configuration(wkhtmltopdf='C:\\Program Files\\wkhtmltopdf\\bin\\wkhtmltopdf.exe')
        return pdfkit.from_file('C:/Users/eakli/marque_verbal.html', 'marque_verbal.pdf',  configuration=config)    
    
    except OSError:
        return pdfkit.from_file('C:/Users/eakli/marque_verbal.html', 'marque_verbal.pdf',  verbose=True)
        print("not present in PATH")
        
        
    

In [None]:
clean_marque_verbal("C:/Users/eakli/response.xml")

True

# Marque

In [None]:
#function to search for company name and return dataframe


def commmerce_name_search(query):

    username = "bs@legalmarque.com"
    password = "YbxnQ2rt"
    url = 'https://www.zefix.admin.ch/ZefixPublicREST/api/v1/company/search'


    #String Formatting
    post_json = (

    "{"  #first curly bracket
    "\"activeOnly\": true,"  #--> key, value begins and ends with \" except for bool
     f"\"name\": \"{query}\""
     "}" #last curly bracket
     
     )
    #convert formatted string to json
    payload = json.loads(post_json)

    #define headers
    headers = {
    "accept": "application/json",
    "Content-Type": "application/json"
    }

    #post request using above params
    response = requests.post(url, 
                         headers=headers,
                         auth=(username, password), 
                         json=payload)
    
    if response.status_code == 200:
        commerce_search = response.json()
        df = json_normalize(commerce_search)

        #drop unwanted columns
        df.drop(['ehraid','chid', 'legalSeatId',
       'registryOfCommerceId', 'status', 'sogcDate', 'deletionDate',
       'legalForm.id', 'legalForm.uid', 'legalForm.name.de',
       'legalForm.name.fr', 'legalForm.name.it', 'legalForm.name.en',
       'legalForm.shortName.de',
       'legalForm.shortName.it', 'legalForm.shortName.en'], axis=1, inplace=True)
        
        #rename cols with new names
        df[['Raison de commerce', 'IDE', 'Siege', 'Forme']] = df[['name', 'uid','legalSeat','legalForm.shortName.fr']]

        #drop old names
        df.drop(["name", "uid", "legalSeat", "legalForm.shortName.fr"], axis=1, inplace=True)
        
        df['Siege'] = df['Siege'].apply(lambda x: remove_accents(x))
        df['Raison de commerce'] = df['Raison de commerce'].apply(lambda x: remove_accents(x))
        df['Forme'] = df['Forme'].apply(lambda x: remove_accents(x))
        
        df_commerce_html = df.to_html().replace('<table border="1" class="dataframe">',
                                                              '<table border="1" style="border-collapse: '
                                                              'collapse;text-align: center;font-family:Calibri;font-size:20px;" '
                                                              'class="dataframe;">').replace(
                                                              '<tr style="text-align: right;">', '<tr style="text-align: center;table-layout: fixed; width: 100%; index:False";>')
        
    #save html result
    with open("marque_commerce.html", "w") as f:
        f.write(df_commerce_html)
        
    try:
        config = pdfkit.configuration(wkhtmltopdf='C:\\Program Files\\wkhtmltopdf\\bin\\wkhtmltopdf.exe')
        return pdfkit.from_file('C:/Users/eakli/marque_commerce.html', 'marque_commerce.pdf',  configuration=config)    
    
    except OSError:
        return pdfkit.from_file('C:/Users/eakli/marque_commerce.html', 'marque_commerce.pdf',  verbose=True)
#         print("not present in PATH")

In [None]:
commmerce_name_search(query)

True

# Domain Availability

In [None]:
from domainavailability import *

def domain_name_checker(query):

    client = Client('at_kvQSqHPLS7IgXJQiRdDA0o1ePSNBv')

    raw_result_ch = client.raw_data(
    f'{query}' + '.ch',
    mode=Client.DNS_AND_WHOIS_MODE,
    credits_type=Client.WHOIS_CREDITS,
    output_format=Client.XML_FORMAT)

    #time.sleep(1)

    raw_result_swiss = client.raw_data(
    f'{query}' + '.swiss',
    mode=Client.DNS_AND_WHOIS_MODE,
    credits_type=Client.WHOIS_CREDITS,
    output_format=Client.XML_FORMAT)

    df_ch = pd.read_xml(raw_result_ch)
    df_swiss = pd.read_xml(raw_result_swiss)

    frames = [df_ch, df_swiss]
    df = pd.concat(frames, ignore_index=True)

    for i in df['domainAvailability']:
        if i == "AVAILABLE":
            df['domainAvailability'].replace(i, 'Disponible', inplace=True)
        else:
            df['domainAvailability'].replace(i, 'Pas Disponible', inplace=True)
            
    df_domain_html = df.to_html().replace('<table border="1" class="dataframe">',
                                                            '<table border="1" style="border-collapse: '
                                                            'collapse;text-align: center;font-family:Calibri;font-size:20px;" '
                                                            'class="dataframe;">').replace(
                                                            '<tr style="text-align: right;">', '<tr style="text-align: center;table-layout: fixed; width: 100%; index:False";>')
    #save html result
    with open("domain_Name.html", "w") as f:
        f.write(df_domain_html)
    config = pdfkit.configuration(wkhtmltopdf='C:\\Program Files\\wkhtmltopdf\\bin\\wkhtmltopdf.exe')
    return pdfkit.from_file('domain_Name.html', 'domain_Name.pdf', configuration=config)

       

In [None]:
domain_name_checker(query)

True

In [None]:
from PyPDF2 import PdfMerger

merger = PdfMerger()

for pdf in ["C:/Users/eakli/marque_verbal.pdf", "C:/Users/eakli/marque_logo.pdf", "C:/Users/eakli/marque_commerce.pdf", "C:/Users/eakli/domain_Name.pdf"]:
    merger.append(pdf)

merger.write("LegaNota.pdf")
merger.close()