# Contacts organizing and exporting

* 1. read and clean data
* 2. check if df['Name'] has 'Mbo', 'Brm', 'Val', 'Vale' and create a new col df['City']
* 3. filter dataframe df['City'] to get only those contacts;
* 4. create a new dataframe from those contacts with only wanted columns
* 5. export to excel file

In [12]:
import pandas as pd
# pip3 install XlsxWriter -- to work with excel files with pandas

### 1. Read data and clean data

In [2]:
df = pd.read_csv('new_contacts.csv', engine='python')

In [3]:
# set the row 0 as the header names for the columns
df.columns = df.iloc[0]

In [4]:
# keep all rows, except 1st and 2nd row
df = df[2:]

### 2. check if df['Name'] has 'Mbo', 'Brm', 'Val', 'Vale' and create a new col df['City']

* if in full name, it starts with keywords of cities
* return a full city name and addede to a new columns for filtering

In [15]:
def check_name_index(row):
    name = row['Name']
    if type(name) is str:
        splitted = name.split(" ")
        if splitted[0].lower() == 'brm':
            return 'Barquisimeto'
        if splitted[0].lower() == 'mbo':
            return 'Maracaibo'
        if splitted[0].lower() == 'val':
            return 'Valencia'
        if splitted[0].lower() == 'vale':
            return 'Valera'

df['Ciudad'] = df.apply(lambda row: check_name_index(row), axis='columns')

### 3. filter dataframe df['City'] to get only those contacts;

In [16]:
# filter only contacts who started with city keywords in full name and create new dataframe
condition = (df['Ciudad'].notnull())
contactsdf = df[condition]

### 4. create a new dataframe from those contacts with only wanted columns

In [17]:
# filter only wanted colums in dataframe
contact_cols = ['Cedula','Direccion','Name','Given Name','Additional Name','Family Name','Nacimiento','Ciudad','Phone 1 - Value']
contactsdf = contactsdf.loc[:, contactsdf.columns.isin(contact_cols)]

### 5. export to excel file

In [19]:
# Create a Pandas Excel writer using XlsxWriter as the engine.
writer = pd.ExcelWriter('vitis.xlsx', engine='xlsxwriter')

# Convert the dataframe to an XlsxWriter Excel object.
contactsdf.to_excel(writer, sheet_name='contacts')

# Close the Pandas Excel writer and output the Excel file.
writer.save()

## Cleaning contact numbers

In [15]:
def replace_right_parenth(num):
    return num.replace(')','')
 
def replace_left_parenth(num):
    return num.replace('(','')
 
def replace_dash(num):
    return num.replace('-','')

def replace_space(num):
    return num.replace(' ','')

def add_country_code(num):
    if num.startswith('0'):
        return num.replace('0','+58', 1)
    return num

In [16]:
def clean_contact_number(nums):
    new_nums = list()
    for num in nums:
        num = replace_space(num)
        num = replace_right_parenth(num)
        num = replace_left_parenth(num)
        num = replace_dash(num)
        num = add_country_code(num)
        new_nums.append(num)
    return new_nums

In [17]:
nums = ['0414 626-2025', '0 261-7925620', '+58 414-6327320', '+1 (786) 757-6206']
clean_contact_number(nums)

['+584146262025', '+582617925620', '+584146327320', '+17867576206']