In [1]:
# Notebook makes data public publishing safe.
# Aim to remove email addresses and full names, replacing them with ID's and abbreviations.

In [2]:
import pandas as pd
import numpy as np
import os
import random
import string


pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns',None)

In [3]:
os.listdir()

['.git',
 '.gitignore',
 '.ipynb_checkpoints',
 '0.0_Container for Events.ipynb',
 '2025_Events_Planning.csv',
 'Asana import and Clean.ipynb',
 'asanaData.csv',
 'asanaData_single.csv',
 'asana_dates_clean.csv',
 'Children_Master.csv',
 'connection1',
 'connectionTest',
 'Engagment_counts.csv',
 'Events',
 'Family Futures EventTabling Feedback.csv',
 'Image20250630140836.png',
 'Local_asanaData.csv',
 'Local_asanaData_single.csv',
 'Local_Emails_Names.csv',
 'Local_SurveyCompletions.csv',
 'Local_surveyMonkey.csv',
 'Private_Local to Public.ipynb',
 'README.md',
 'Reports',
 'Survey Completion.ipynb',
 'SurveyCompletions.csv',
 'SurveyMonkey import and Clean.ipynb',
 'surveyMonkey.csv',
 'Tableau Events',
 'test',
 'Thumbs.db']

In [4]:
#getting all needed datasets
Emails_Names_raw = pd.read_csv('Local_Emails_Names.csv')
asanaData_single_cleaned = pd.read_csv('Local_asanaData_single.csv')
asanaData_cleaned = pd.read_csv('Local_asanaData.csv')
surveyMonkey_cleaned = pd.read_csv('Local_surveyMonkey.csv')
SurveyCompletions_report = pd.read_csv('Local_SurveyCompletions.csv')

In [5]:
Emails_Names_raw.head()

Unnamed: 0,Display name,Department,Username,Email
0,Accounting and HR,Email,AccountingandHR,AccountingandHR@familyfutures.org
1,Adison Fryman,Staff,afryman,afryman@familyfutures.org
2,Administrator Template,Special Purpose,,@familyfutures.org
3,Adobe Software 1,,AdobeSoftware1,FALSE
4,Adobe Software 2,,AdobeSoftware2,FALSE


In [6]:
# Creating ID's with First letter of Department + zfill(5)
Emails_Names_raw["ID"] = (
    Emails_Names_raw["Department"].str[0].str.upper() #first letter of Department
    + (Emails_Names_raw.index + 1).astype(str).str.zfill(5) #sequential number starting at 1 with leading zeros
)

#creating fake usernames
# Split the name into first and last
name_split = Emails_Names_raw["Display name"].str.split(" ", n=1, expand=True)
first_name = name_split[0].str[:3]   # first 3 letters of first name
last_name = name_split[1].str[:3]    # first 3 letters of last name

# Combine into CodeName
Emails_Names_raw["CodeName"] = first_name + last_name

# Drop any Emails_Names_raw where ID or CodeName = NaN
Emails_Names_raw = Emails_Names_raw.dropna(subset=["ID", "CodeName"])

In [7]:
Emails_Names_raw

Unnamed: 0,Display name,Department,Username,Email,ID,CodeName
0,Accounting and HR,Email,AccountingandHR,AccountingandHR@familyfutures.org,E00001,Accand
1,Adison Fryman,Staff,afryman,afryman@familyfutures.org,S00002,AdiFry
2,Administrator Template,Special Purpose,,@familyfutures.org,S00003,AdmTem
10,Adrianna Lynn,External Healthy Families,,@familyfutures.org,E00011,AdrLyn
16,Alissa Rodriguez,External Healthy Families,,@familyfutures.org,E00017,AliRod
17,Allison Vriesenga,External Healthy Families,,@familyfutures.org,E00018,AllVri
18,Alyce Hernandez,External Healthy Families,ahernandez,ahernandez@familyfutures.org,E00019,AlyHer
19,Alysse Calabio,Intern,acalabio,acalabio@familyfutures.org,I00020,AlyCal
20,Amanda Vins,Staff,avins,avins@familyfutures.org,S00021,AmaVin
21,Amelia Stanley,External Healthy Families,,@familyfutures.org,E00022,AmeSta


In [8]:
#Section 2
        # removing private information from files

In [9]:
# 1: asanaData_single_cleaned

asanaData_single_cleaned.head(20)
#columns to adjust:
#'Assignee'-its full name, change to code name

# Create a mapping from Display name → CodeName
name_to_code = dict(zip(Emails_Names_raw["Display name"], Emails_Names_raw["CodeName"]))

# Replace Assignee names with CodeName
asanaData_single_cleaned["Assignee"] = asanaData_single_cleaned["Assignee"].map(name_to_code)


#asanaData_single_cleaned.head()
asanaData_single_cleaned.to_csv('asanaData_single.csv')

In [13]:
# 2: asanaData_cleaned


Columns_to_Remove_names = {   'Staff Attending',
 'FAB Attending',
 'Intern(s) Attending',
 'Volunteers Attending',
 "Assignee"                  
       }

#asanaData_cleaned = asanaData_cleaned.drop(Columns_to_Remove, axis = 1)

# Create a mapping from Display name → CodeName
name_to_code = dict(zip(Emails_Names_raw["Email"], Emails_Names_raw["CodeName"]))

def map_names_to_codes(cell):
    if isinstance(cell, list):
        return [name_to_code.get(n.strip(), n) for n in cell]
    elif pd.isna(cell) or cell == "":
        return cell
    else:  # assume it's a comma-separated string
        names = [n.strip() for n in cell.split(",")]
        return [name_to_code.get(n, n) for n in names]

# Apply to all columns in the set that exist in the dataframe
for col in Columns_to_Remove_names:
    if col in asanaData_cleaned.columns:
        asanaData_cleaned[col] = asanaData_cleaned[col].apply(map_names_to_codes)


asanaData_cleaned.head(20)

#asanaData_single_cleaned.head()
asanaData_cleaned.to_csv('asanaData.csv')


In [50]:
# 3: surveyMonkey_cleaned

Columns_to_Remove2 = {'First Name','Last Name', 'Email'}

surveyMonkey_cleaned = surveyMonkey_cleaned.drop(Columns_to_Remove2, axis = 1)

# Create a mapping from Display name → CodeName
name_to_code = dict(zip(Emails_Names_raw["Display name"], Emails_Names_raw["CodeName"]))

# Replace Assignee names with CodeName
surveyMonkey_cleaned["Person Name"] = surveyMonkey_cleaned["Person Name"].map(name_to_code)


# create fake email from CodeName- bunch of NaN's 

surveyMonkey_cleaned['Email'] = surveyMonkey_cleaned['Person Name']+'@FamilyFutures.org'


surveyMonkey_cleaned.to_csv('surveyMonkey.csv')


In [51]:
# 4: SurveyCompletions_report
Columns_to_Remove3 = {'First Name','Last Name', 'Attending Email','Email','Email SM','Unnamed: 0','Username'}
SurveyCompletions_report = SurveyCompletions_report.drop(Columns_to_Remove3, axis = 1)


# Create a mapping from Display name → CodeName
name_to_code = dict(zip(Emails_Names_raw["Display name"], Emails_Names_raw["CodeName"]))

# Replace Assignee names with CodeName
SurveyCompletions_report["Display name"] = SurveyCompletions_report["Display name"].map(name_to_code)
#SurveyCompletions_report.head(25)
SurveyCompletions_report.to_csv('SurveyCompletions.csv')