# Contacts organizing and exporting

* 1. read and clean data
* 2. check if df['Name'] has 'Mbo', 'Brm', 'Val', 'Vale' and create a new col df['City']
* 3. filter dataframe df['City'] to get only those contacts;
* 4. create a new dataframe from those contacts with only wanted columns
* 5. clean wanted data
* 6. export to excel file

In [1]:
import pandas as pd
# pip3 install XlsxWriter -- to work with excel files with pandas

### 1. Read data and pre-clean data

In [2]:
df = pd.read_csv('new_contacts.csv', engine='python')

In [3]:
# set the row 0 as the header names for the columns
df.columns = df.iloc[0]

In [4]:
# keep all rows, except 1st and 2nd row
df = df[2:]

### 2. check if df['Name'] has 'Mbo', 'Brm', 'Val', 'Vale' and create a new col df['City']

* if in full name, it starts with keywords of cities
* return a full city name and addede to a new columns for filtering

In [5]:
def check_name_index(row):
    name = row['Name']
    if type(name) is str:
        splitted = name.split(" ")
        if splitted[0].lower() == 'brm':
            return 'Barquisimeto'
        if splitted[0].lower() == 'mbo':
            return 'Maracaibo'
        if splitted[0].lower() == 'val':
            return 'Valencia'
        if splitted[0].lower() == 'vale':
            return 'Valera'

df['Ciudad'] = df.apply(lambda row: check_name_index(row), axis='columns')

### 3. filter dataframe df['City'] to get only those contacts;

In [6]:
# filter only contacts who started with city keywords in full name and create new dataframe
condition = (df['Ciudad'].notnull())
contactsdf = df[condition]

### 4. create a new dataframe from those contacts with only wanted columns

In [7]:
# 4.1 filter only wanted colums in dataframe
contact_cols = ['Name','Given Name','Additional Name','Family Name','Birthday','Ciudad','Phone 1 - Value']
contactsdf = contactsdf.loc[:, contactsdf.columns.isin(contact_cols)]

### TODO create new columns for new wanted data to the dataframe


In [8]:
# 4.2 rename columns
new_names = {
    'Name':'Nombre Completo',
    'Given Name':'Primer Nombre',
    'Additional Name':'Segundo Nombre',
    'Family Name':'Apellido',
    'Birthday':'Nacimiento',
    'Ciudad':'Ciudad',
    'Phone 1 - Value':'Telefono',
}
contactsdf.rename(columns = new_names, inplace = True)
# contactsdf.head(10)

In [9]:
# 4.3 add new columns
#------------------------------------------------------
#add more than one columns
# new_cols = {
#     'Direccion' : lambda contactsdf: 5*5,
#     'Correo' : lambda contactsdf: '',
#     'Correo2' : lambda contactsdf: '',
#     'Telefono2' : lambda contactsdf: '',
# }
# contactsdf.assign(**new_cols, inplace = True)
# #------
# contactsdf.assign(Area=lambda contactsdf: contactsdf.Apellido, inplace=True)
#-------------------------------------------------------

contactsdf['Cedula'] = ''
contactsdf['Direccion'] = ''
contactsdf['Correo'] = ''
contactsdf['Correo2'] = ''
contactsdf['Telefono'] = ''
contactsdf['Telefono2'] = ''
# contactsdf.drop(['Direccion', 'Correo', 'Correo2', 'Telefono2'], inplace = True, axis = 1)
contactsdf.head()

Unnamed: 0,Nombre Completo,Primer Nombre,Segundo Nombre,Apellido,Nacimiento,Telefono,Ciudad,Cedula,Direccion,Correo,Correo2,Telefono2
69,Brm Agustin D'Ongia,Brm,Agustin,D'Ongia,,,Barquisimeto,,,,,
70,Brm Alberto Pérez,Brm,Alberto,Pérez,,,Barquisimeto,,,,,
71,Brm Alejadra Maury Eventos,Brm Alejadra,Maury,Eventos,,,Barquisimeto,,,,,
72,Brm Alejandro Vela,Brm,Alejandro,Vela,,,Barquisimeto,,,,,
73,Brm Alexander,Brm,,Alexander,,,Barquisimeto,,,,,


In [10]:
# 4.4 order all columns
cols_order = [
    'Cedula',
    'Ciudad',
    'Telefono',
    'Correo',
    'Nombre Completo',
    'Direccion',
    'Primer Nombre',
    'Segundo Nombre',
    'Apellido',
    'Telefono2',
    'Correo2',
]

contactsdf = contactsdf[cols_order]
contactsdf.head(300)

Unnamed: 0,Cedula,Ciudad,Telefono,Correo,Nombre Completo,Direccion,Primer Nombre,Segundo Nombre,Apellido,Telefono2,Correo2
69,,Barquisimeto,,,Brm Agustin D'Ongia,,Brm,Agustin,D'Ongia,,
70,,Barquisimeto,,,Brm Alberto Pérez,,Brm,Alberto,Pérez,,
71,,Barquisimeto,,,Brm Alejadra Maury Eventos,,Brm Alejadra,Maury,Eventos,,
72,,Barquisimeto,,,Brm Alejandro Vela,,Brm,Alejandro,Vela,,
73,,Barquisimeto,,,Brm Alexander,,Brm,,Alexander,,
74,,Barquisimeto,,,Brm Alexander Fiacco,,Brm,Alexander,Fiacco,,
75,,Barquisimeto,,,Brm Ana Patricia,,Brm,Ana,Patricia,,
76,,Barquisimeto,,,Brm Ana Riera,,Brm,Ana,Riera,,
77,,Barquisimeto,,,Brm Angela Lapenta,,Brm,Angela,Lapenta,,
78,,Barquisimeto,,,Brm Antoinette Bujana 04145083271,,Brm Antoinette,Bujana,04145083271,,


### 5. clean wanted data

#### 5.1 cleaning contact number

In [11]:
def remove_lead_trail_whitespaces(num):
    return num.strip()

def remove_right_parenthesis(num):
    return num.replace(')','')
 
def remove_left_parenthesis(num):
    return num.replace('(','')
 
def remove_dashes(num):
    return num.replace('-','')

def remove_inside_whitespaces(num):
    return num.replace(' ','')

def add_country_code(num):
    '''add the venezuelan country code if "0" found at the begginning of contact number'''
    if num.startswith('0'):
        return num.replace('0','+58', 1) # replaces the "0" for country code only once
    return num

In [16]:
def clean_contact_number(row):
    num = str(row['Telefono'])
    num = remove_lead_trail_whitespaces(num)
    num = remove_right_parenthesis(num)
    num = remove_left_parenthesis(num)
    num = remove_dashes(num)
    num = remove_inside_whitespaces(num)
    num = add_country_code(num)
    return num

contactsdf['Telefono'] = contactsdf.apply(lambda row: clean_contact_number(row), axis='columns')
contactsdf.head()

TypeError: string indices must be integers

### 6. export to excel file

In [13]:
# Create a Pandas Excel writer using XlsxWriter as the engine.
writer = pd.ExcelWriter('vitis.xlsx', engine='xlsxwriter')

# Convert the dataframe to an XlsxWriter Excel object.
contactsdf.to_excel(writer, sheet_name='contacts')

# Close the Pandas Excel writer and output the Excel file.
writer.save()