# Cleaning Customer List - With User input that adds to the DataFrame!

## First things first import my libraries

In [1]:
import pandas as pd
from datetime import datetime

## Convert my CSV file into a DataFrame

In [2]:
df = pd.read_csv('customer_list_updated.csv', delimiter='|')

df.columns = df.columns.str.strip()

columns = ['cust_id', 'date', 'time', 'name', 'email', 'phone', 'sms-opt-out']

In [3]:
df.head(10)

Unnamed: 0,cust_id,date,time,name,email,phone,sms-opt-out
0,1,2023-03-15,08:45:12,Rachel,rachel@centralperk.coffee,212-555-1001,N
1,2,2023-05-22,12:30:45,R&! Geller,rossg@centralperk.coffee,212-555-1002,N
2,3,2023-07-09,18:15:27,Monica Geller,chefmonica@centralperk.coffee,212-555-1003,N
3,4,2023-09-01,21:05:33,Chandler Bing,chandlerb@centralperk.coffee,212-555-1004,Y
4,5,2023-11-18,14:22:10,Joey,howyoudoing@centralperk.coffee,212-555-1005,N
5,6,2024-01-05,10:55:49,P&! Buffay,smellycat@centralperk.coffee,212-555-1006,N
6,7,2024-02-14,16:40:05,Gunther,gunther@centralperk.coffee,212-555-1007,N
7,8,2023-04-20,09:15:30,Janice,ohmygod@centralperk.coffee,212-555-1008,N
8,9,2023-06-30,13:50:55,Mike H.^,mike@centralperk.coffee,212-555-1009,N
9,10,2023-08-25,17:25:10,Emily,emily@centralperk.coffee,212-555-1010,N


## Now that we have a DataFrame to work with LETS GET TO THE FUN PART!

In [4]:
df = df.rename(columns={'sms-opt-out': 'sms_opt_out'})

In [5]:
df.columns

Index(['cust_id', 'date', 'time', 'name', 'email', 'phone', 'sms_opt_out'], dtype='object')

In [6]:
print(df[df.duplicated()])

Empty DataFrame
Columns: [cust_id, date, time, name, email, phone, sms_opt_out]
Index: []


## User Input data for new Customers!

**Convert the date Column to a Date Format and determine the next cust_id**

In [7]:
df['date'] = pd.to_datetime(df['date'], errors='coerce')

df['date'] = df['date'].dt.date

**Determine the next cust_id**

In [8]:
next_cust_id = df['cust_id'].max() + 1

**Dictionary for new customer data.**

In [9]:
new_data = {'cust_id': next_cust_id}

**Collect the current time and date**

In [10]:
current_datetime = datetime.now()

new_data['date'] = current_datetime.strftime('%Y-%m-%d')
new_data['time'] = current_datetime.strftime('%H:%M:%S')

**Define Columns and Collect User Input**

In [11]:
columns = ['cust_id', 'date', 'time', 'name', 'email', 'phone', 'sms_opt_out']

**Loop Through Specific Columns and Prompt for User Input**

In [12]:
for col in columns[3:]:
    value = input(f"Enter {col} (or leave blank for None): ")
    new_data[col] = value if value.strip() else "None"

Enter name (or leave blank for None):  Anthony
Enter email (or leave blank for None):  example@yahoo.com
Enter phone (or leave blank for None):  7732261000
Enter sms_opt_out (or leave blank for None):  Y


**Create a New Row as a DataFrame and Concatenate the new row to the exisiting DataFrame**

In [13]:
new_row = pd.DataFrame([new_data]) 
df = pd.concat([df, new_row], ignore_index=True)

**Save the Updated DataFrame to a CSV File**

In [14]:
df.to_csv('customer_list_updated.csv', index=False, sep='|')
print("New Data Entered ^_^")

New Data Entered ^_^


## Time to Clean up the brand new DataFrame with the added Row

**Handle Missing sms_opt_out Values**

In [15]:
df.loc[df['sms_opt_out'].isnull(), 'sms_opt_out'] = "Y"

**Filling values in all columns that have missing values into None**

In [16]:
df[columns] = df[columns].fillna("None")

**Clean Phone Numbers**

In [17]:
df['phone'] = df['phone'].apply(
    lambda x: ''.join(filter(str.isdigit, str(x)))[-10:])

In [19]:
df['phone'] = df['phone'].apply(
    lambda x: f"{x[:3]}-{x[3:6]}-{x[6:]}" if len(x) == 10 else "None_Number")

**Convert email to lowercase and strip any whitespace**

In [20]:
df['email'] = df['email'].str.lower().str.strip()

**Removes characters that are not letters, hyphens, or periods**

In [21]:
df['name'] = df['name'].str.replace(r"[^a-zA-Z-.' ]", '', regex=True).str.title().str.strip()

**Removes duplicates based on name and email**

In [22]:
df.drop_duplicates(subset=['cust_id', 'email'], inplace=True)

In [23]:
df.to_csv("cleaned_customer_data.csv", index=False)