In [2]:
import pandas as pd

# Loads data from the CSV file './deliveries.csv' into a DataFrame named 'del_df'.
# The separator is specified as ';' since the data is not separated by commas by default.
del_df = pd.read_csv('./deliveries.csv', sep= ';')

# Displays the first 5 rows of the DataFrame to get a preview of the loaded data.
del_df.head()

Unnamed: 0,id,campaign_id,campaign_name,newsletter_id,newsletter_name,template_id,template_name,transactional_message_id,transactional_message_name,subject,...,unsubscribed_RFC3339,suppressed,suppressed_RFC3339,failed,failed_RFC3339,drafted,drafted_RFC3339,topic_unsubscribed,topic_unsubscribed_RFC3339,email
0,RPCtBwUAAYw2N2cGLMlm-0X9RlZ8ZQ==,,,,,242,Low data,4.0,Low data,Running out of data?,...,,,,,,,,,,reggidori@allin-partners.com
1,RPCtBwUAAYw2JZ5ZnHbPWa9Mgfeamw==,,,,,244,Video uploaded,6.0,Video uploaded,Video successfully uploaded!,...,,,,,,,,,,hamdi@videoblogg.com
2,dgTwrQcAAMfGCMbGCAGMNcXPsI8buTfq_C49eIw=,28.0,Signup onboarding v3,,,223,Share live stream features,,,Take your streams to the next level with Dacast,...,,,,,,,,,,maheshmahi@gamil.con
3,dgTwrQcAALjJCLfJCAGMNcXPpfPDnufKi-jyyz0=,28.0,Signup onboarding v3,,,205,Share live stream features,,,Take your streams to the next level with Dacast,...,,,,,,,,,,anshul@realimapct.tv
4,dgTwrQcAAJnJCJjJCAGMNcXPpe2eqZyz7iMBYxs=,28.0,Signup onboarding v3,,,222,Onboarding Stream,,,Start streaming with Dacast today,...,,,,,,,,,,johncookj11@netzero.com


In [3]:
# Creates a new column named 'non existent', this column will contain True if the 'failure_message' column contains the text 'does not exist' or '5.1.1',
# which was found to indicate that the email could not be delivered because it is a non-existent email. Otherwise, it will contain False.
del_df['non existent'] = del_df['failure_message'].apply(lambda x: True if 'does not exist' in str(x) or '5.1.1' in str(x) else False)

# Displays the first 5 rows of the DataFrame with the new 'non existent' column.
del_df.head()

Unnamed: 0,id,campaign_id,campaign_name,newsletter_id,newsletter_name,template_id,template_name,transactional_message_id,transactional_message_name,subject,...,suppressed,suppressed_RFC3339,failed,failed_RFC3339,drafted,drafted_RFC3339,topic_unsubscribed,topic_unsubscribed_RFC3339,email,non existent
0,RPCtBwUAAYw2N2cGLMlm-0X9RlZ8ZQ==,,,,,242,Low data,4.0,Low data,Running out of data?,...,,,,,,,,,reggidori@allin-partners.com,False
1,RPCtBwUAAYw2JZ5ZnHbPWa9Mgfeamw==,,,,,244,Video uploaded,6.0,Video uploaded,Video successfully uploaded!,...,,,,,,,,,hamdi@videoblogg.com,True
2,dgTwrQcAAMfGCMbGCAGMNcXPsI8buTfq_C49eIw=,28.0,Signup onboarding v3,,,223,Share live stream features,,,Take your streams to the next level with Dacast,...,,,,,,,,,maheshmahi@gamil.con,False
3,dgTwrQcAALjJCLfJCAGMNcXPpfPDnufKi-jyyz0=,28.0,Signup onboarding v3,,,205,Share live stream features,,,Take your streams to the next level with Dacast,...,,,,,,,,,anshul@realimapct.tv,False
4,dgTwrQcAAJnJCJjJCAGMNcXPpe2eqZyz7iMBYxs=,28.0,Signup onboarding v3,,,222,Onboarding Stream,,,Start streaming with Dacast today,...,,,,,,,,,johncookj11@netzero.com,False


In [5]:
# Filters the DataFrame 'del_df' to select only the rows where the 'non existent' column is True, from those rows, it selects only the 'email' column
non_exist_df = del_df.loc[del_df['non existent'] == True, 'email']

# There are some emails that appear more than once, therefore, .drop_duplicates removes duplicate emails to get a list of unique emails.
non_exist_df.drop_duplicates(inplace=True)

# Resets the index of the 'non_exist_df' Series to be a simple numerical sequence.
non_exist_df.reset_index(drop=True, inplace=True)

# Displays the first 5 entries of the resulting Series with the unique non-existent emails.
non_exist_df.head()

Unnamed: 0,email
0,hamdi@videoblogg.com
1,wooodbenradel2018@gmail.com
2,keith456@stcharles.k12.la.us
3,chloeherman33@gmail.com
4,vatistasantinis1@gmail.com


In [6]:
# Saves the 'non_exist_df' Series (which contains the unique non-existent emails) to a CSV file named 'non_existent_emails.csv'.
# index=False prevents the Series index from being written to the CSV file.
non_exist_df.to_csv('./non_existent_emails.csv', index=False)