Use Cases:
1. Compare how many members you had in one month vs another (i.e March 2022 vs March 2023)
2. Get a list of all members that were members in both months
3. Get a list of who was a member in the past but no longer a member now

1. Load previous month datasets
2. Load current months datasets
3. Locate where names are the same
4. Spit out the names that are on previous month but not on current month
5. Tell me how many of which membership is they were on in the previous month

In [2]:
## Load Pandas
import pandas as pd

In [3]:
## Import datasets
previous_df = pd.read_csv('march2023.csv')
current_df = pd.read_csv('january2024.csv')
current_df

Unnamed: 0,Studio Code,Purchase Date,Invoice ID,Client ID,First Name,Last Name,Email,Phone #,Therapy Category,Business Category,Item,Sales Rep,Autopay Status,Tax Amount,Amount,Discount,Gross adjusted Revenue,Quantity,Credit Used
0,FL005,1/1/2024,31700346,211522,Christine,Norwitch,cswitch@bellsouth.net,3.053237e+09,Memberships,Membership,Core Membership,,Success,0,,0.0,279.0,1,0
1,FL005,1/1/2024,31700347,599282,Deborah,Marks,synergy928@aol.com,7.868533e+09,Memberships,Membership,Level Up Membership,,Success,0,,0.0,199.0,1,0
2,FL005,1/1/2024,31700348,669466,Vane,V,mv4@gmail.com,7.862710e+09,Memberships,Membership,Discover Membership,,Success,0,,0.0,119.0,1,0
3,FL005,1/1/2024,31700349,706036,Paula,Samper,paulasamper040612@gmail.com,7.204437e+09,Memberships,Membership,Discover Membership,,Success,0,,0.0,107.1,1,0
4,FL005,1/1/2024,31700350,714362,Joel,Castillo,cjoel777@me.com,3.057937e+09,Memberships,Membership,Discover Membership,,Success,0,,0.0,119.0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
337,FL005,1/31/2024,31904047,617498,Rafael,Arte,rarte26@outlook.com,7.865893e+09,Memberships,Membership,Discover Membership,,Success,0,,0.0,119.0,1,0
338,FL005,1/31/2024,31904048,781589,Mariangelli,Alvarez,mariangelli25@gmail.com,7.863545e+09,Memberships,Membership,Level Up Membership,,Success,0,,0.0,179.1,1,0
339,FL005,1/31/2024,31904049,30596936,Bionny,Ayala,bionnyayala@aol.com,3.052995e+09,Memberships,Membership,Discover Membership,,Success,0,,0.0,119.0,1,0
340,FL005,1/31/2024,31904050,957039,Liz,Sanderson,elizabeth.h.sanderson@gmail.com,6.462078e+09,Memberships,Membership,Level Up Membership,,Success,0,,0.0,199.0,1,0


In [4]:
## Clean data
previous_df = previous_df.drop(columns=['Studio Code', 'Invoice ID', 'Client ID', 'Therapy Category', 'Business Category', 'Sales Rep', 'Autopay Status', 'Tax Amount', 'Amount', 'Discount', 'Gross adjusted Revenue', 'Quantity', 'Credit Used'])
current_df = current_df.drop(columns=['Studio Code', 'Invoice ID', 'Client ID', 'Therapy Category', 'Business Category', 'Sales Rep', 'Autopay Status', 'Tax Amount', 'Amount', 'Discount', 'Gross adjusted Revenue', 'Quantity', 'Credit Used'])
previous_df

Unnamed: 0,Purchase Date,First Name,Last Name,Email,Phone #,Item
0,3/1/2023,Diana,Izeddin,gizeddin@yahoo.com,7863908851,Wellness Membership
1,3/1/2023,Marsha,Goolcharan,actormarsha@gmail.com,3239831445,Discover Membership
2,3/1/2023,Marcos,Molina,Mmolina25@yahoo.com,7863031151,Restore Membership
3,3/1/2023,Amanda,Fuentes,my.amanda917@gmail.com,3053056025,Restore Membership
4,3/1/2023,Brean,Beckford,breanromeo@aol.com,3057104492,Restore Membership
...,...,...,...,...,...,...
449,3/31/2023,Heather,Metoyer,hmmetoyer@gmail.com,8138925252,Discover Membership
450,3/31/2023,Yunitza,Gonzalez,gyunitza@yahoo.com,17863188973,Discover Membership
451,3/31/2023,Melissa,Cancio,canciomelissa@yahoo.com,7864575107,Discover Membership
452,3/31/2023,Rafael,Arte,rarte26@outlook.com,7865893488,Discover Membership


In [5]:
## Combine Names
current_df['Name'] = current_df['First Name'] + ' ' + current_df['Last Name']
current_df = current_df.drop(columns=['First Name', 'Last Name'])
current_df = current_df[['Name', 'Email', 'Phone #', 'Item', 'Purchase Date']]

previous_df['Name'] = previous_df['First Name'] + ' ' + previous_df['Last Name']
previous_df = previous_df.drop(columns=['First Name', 'Last Name'])
previous_df = previous_df[['Name', 'Email', 'Phone #', 'Item', 'Purchase Date']]

## Printing
previous_df

Unnamed: 0,Name,Email,Phone #,Item,Purchase Date
0,Diana Izeddin,gizeddin@yahoo.com,7863908851,Wellness Membership,3/1/2023
1,Marsha Goolcharan,actormarsha@gmail.com,3239831445,Discover Membership,3/1/2023
2,Marcos Molina,Mmolina25@yahoo.com,7863031151,Restore Membership,3/1/2023
3,Amanda Fuentes,my.amanda917@gmail.com,3053056025,Restore Membership,3/1/2023
4,Brean Beckford,breanromeo@aol.com,3057104492,Restore Membership,3/1/2023
...,...,...,...,...,...
449,Heather Metoyer,hmmetoyer@gmail.com,8138925252,Discover Membership,3/31/2023
450,Yunitza Gonzalez,gyunitza@yahoo.com,17863188973,Discover Membership,3/31/2023
451,Melissa Cancio,canciomelissa@yahoo.com,7864575107,Discover Membership,3/31/2023
452,Rafael Arte,rarte26@outlook.com,7865893488,Discover Membership,3/31/2023


In [6]:
## Identify the ammount of members of each category in previous dataset
previous_discover_count = len(previous_df.loc[previous_df['Item'] == 'Discover Membership'])
previous_levelup_count = len(previous_df.loc[previous_df['Item'] == 'Level Up Membership'])
previous_elevate_count = len(previous_df.loc[previous_df['Item'] == 'Elevate Membership'])
previous_core_count = len(previous_df.loc[previous_df['Item'] == 'Core Membership'])
previous_restore_count = len(previous_df.loc[previous_df['Item'] == 'Restore Membership'])
previous_restorecouples_count = len(previous_df.loc[previous_df['Item'] == 'Restore Membership - Couples'])
previous_wellness_count = len(previous_df.loc[previous_df['Item'] == 'Wellness Membership'])
previous_wellnesscouples_count = len(previous_df.loc[previous_df['Item'] == 'Wellness Membership - Couples'])
previous_daily_count = len(previous_df.loc[previous_df['Item'] == 'Daily Membership'])

previous_membership_count = len(previous_df)

print(f'Number of Discover Memberships: {previous_discover_count}')
print(f'Number of Level Up Memberships: {previous_levelup_count}')
print(f'Number of Elevate Memberships: {previous_elevate_count}')
print(f'Number of Core Memberships: {previous_core_count}')
print(f'Number of Restore Memberships: {previous_restore_count}')
print(f'Number of Restore Couples Memberships: {previous_restorecouples_count}')
print(f'Number of Wellness Memberships: {previous_wellness_count}')
print(f'Number of Wellness Couples Memberships: {previous_wellnesscouples_count}')
print(f'Number of Daily Memberships: {previous_daily_count}')

print(f'Total number of all memberships: {previous_membership_count}')

Number of Discover Memberships: 240
Number of Level Up Memberships: 84
Number of Elevate Memberships: 0
Number of Core Memberships: 23
Number of Restore Memberships: 55
Number of Restore Couples Memberships: 19
Number of Wellness Memberships: 17
Number of Wellness Couples Memberships: 6
Number of Daily Memberships: 10
Total number of all memberships: 454


In [18]:
month = previous_df['Purchase Date'].head(1).str.split('/').str[0].values[0]
if month == '1':
    month = 'January'
elif month == '2':
    month = 'February'
elif month == '3':
    month = 'March'
elif month == '4':
    month = 'April'
elif month == '5':
    month = 'May'
elif month == '6':
    month = 'June'
elif month == '7':
    month = 'July'
elif month == '8':
    month = 'August'
elif month == '9':
    month = 'September'
elif month == '10':
    month = 'October'
elif month == '11':
    month = 'November'
elif month == '12':
    month = 'December'

month

'March'

In [6]:
## Identify the ammount of members of each category in current dataset
current_discover_count = len(current_df.loc[current_df['Item'] == 'Discover Membership'])
current_levelup_count = len(current_df.loc[current_df['Item'] == 'Level Up Membership'])
current_elevate_count = len(current_df.loc[current_df['Item'] == 'Elevate Membership'])
current_core_count = len(current_df.loc[current_df['Item'] == 'Core Membership'])
current_restore_count = len(current_df.loc[current_df['Item'] == 'Restore Membership'])
current_restorecouples_count = len(current_df.loc[current_df['Item'] == 'Restore Membership - Couples'])
current_wellness_count = len(current_df.loc[current_df['Item'] == 'Wellness Membership'])
current_wellnesscouples_count = len(current_df.loc[current_df['Item'] == 'Wellness Membership - Couples'])
current_daily_count = len(current_df.loc[current_df['Item'] == 'Daily Membership'])

current_membership_count = len(current_df)

print(f'Number of Discover Memberships: {current_discover_count}')
print(f'Number of Level Up Memberships: {current_levelup_count}')
print(f'Number of Elevate Memberships: {current_elevate_count}')
print(f'Number of Core Memberships: {current_core_count}')
print(f'Number of Restore Memberships: {current_restore_count}')
print(f'Number of Restore Couples Memberships: {current_restorecouples_count}')
print(f'Number of Wellness Memberships: {current_wellness_count}')
print(f'Number of Wellness Couples Memberships: {current_wellnesscouples_count}')
print(f'Number of Daily Memberships: {current_daily_count}')

print(f'Total number of all memberships: {current_membership_count}')

Number of Discover Memberships: 173
Number of Level Up Memberships: 103
Number of Elevate Memberships: 11
Number of Core Memberships: 12
Number of Restore Memberships: 22
Number of Restore Couples Memberships: 4
Number of Wellness Memberships: 4
Number of Wellness Couples Memberships: 1
Number of Daily Memberships: 3
Total number of all memberships: 342


In [7]:
merged_df = pd.merge(previous_df, current_df, on='Name', suffixes=('_prev', '_current'), how='outer', indicator=True)

# Filter out rows that are only in the previous dataframe
lost_members = merged_df[merged_df['_merge'] == 'left_only'][['Name', 'Email_prev', 'Phone #_prev', 'Item_prev']].values.tolist()

print(lost_members)

[['Diana Izeddin', 'gizeddin@yahoo.com', 7863908851.0, 'Wellness Membership'], ['Marsha Goolcharan', 'actormarsha@gmail.com', 3239831445.0, 'Discover Membership'], ['Brean Beckford', 'breanromeo@aol.com', 3057104492.0, 'Restore Membership'], ['Shannon Mathew', 'nonnahskm93@gmail.com', 5864532770.0, 'Core Membership'], ['Kelcie Laroche', 'klaroche13@gmail.com', 5183370829.0, 'Level Up Membership'], ['Paula Sanchez', 'psanchezderio@gmail.com', 7864522950.0, 'Level Up Membership'], ['Gaby Sota', 'mgsz18@hotmail.com', 3058154896.0, 'Discover Membership'], ['Helmut Gaensel', 'hgaensel@gmail.com', 7862900511.0, 'Restore Membership'], ['Julio Affon', 'affonvaljulito@gmail.com', 7868684935.0, 'Discover Membership'], ['Manav Sharma', 'manav007@gmail.com', 9177830180.0, 'Restore Membership - Couples'], ['Christian Cruz', 'cc98kt03@rocketmail.com', 7864862850.0, 'Discover Membership'], ['Marcello Cello Nicoletti', 'Marcellonicoletti@gmail.com', 17865563195.0, 'Level Up Membership'], ['Valentina S

In [8]:
## Grab Names on previous df and put them in a list
overlap_members = []

for index, row1 in previous_df.iterrows():
    for index, row2 in current_df.iterrows():
        name1 = row1['Name']
        name2 = row2['Name']
        if name1 != name2:
            continue
        else:
            my_list = [row1['Name'], row1['Email'], row1['Phone #'], row1['Item']]
            overlap_members.append(my_list)
            continue
    continue

overlap_members

[['Marcos Molina', 'Mmolina25@yahoo.com', 7863031151, 'Restore Membership'],
 ['Amanda Fuentes',
  'my.amanda917@gmail.com',
  3053056025,
  'Restore Membership'],
 ['Christian Leiva', 'crl07c@gmail.com', 3059796192, 'Discover Membership'],
 ['Deborah Marks', 'synergy928@aol.com', 7868532700, 'Level Up Membership'],
 ['Fernando Cosme',
  'djflipmiami@gmail.com',
  7864441717,
  'Discover Membership'],
 ['Jose Zeron', 'zeronmetal@hotmail.com', 7862514419, 'Daily Membership'],
 ['Thomas Pratt',
  'prattthomas69@gmail.com',
  3056081967,
  'Discover Membership'],
 ['Ethel Rodas Rodas',
  'erodas@aeendoscopy.com',
  3053026227,
  'Discover Membership'],
 ['Ariel Rodriguez',
  'arodelighting@gmail.com',
  3052441803,
  'Restore Membership'],
 ['Andre Rojas',
  'rojassevilla01@hotmail.com',
  17863952289,
  'Level Up Membership'],
 ['Tiffany Moe', 'tiramesu71@yahoo.com', 3057100096, 'Discover Membership'],
 ['Makeba Kelly', 'mkelly6870@gmail.com', 9545523985, 'Discover Membership'],
 ['Madis

In [10]:
## Creating dataframes
lost_members_df = pd.DataFrame(lost_members, columns=['Name', 'Email', 'Phone #', 'Membership'])
overlap_members_df = pd.DataFrame(overlap_members, columns=['Name', 'Email', 'Phone #', 'Membership'])
overlap_members_df

Unnamed: 0,Name,Email,Phone #,Membership
0,Marcos Molina,Mmolina25@yahoo.com,7863031151,Restore Membership
1,Amanda Fuentes,my.amanda917@gmail.com,3053056025,Restore Membership
2,Christian Leiva,crl07c@gmail.com,3059796192,Discover Membership
3,Deborah Marks,synergy928@aol.com,7868532700,Level Up Membership
4,Fernando Cosme,djflipmiami@gmail.com,7864441717,Discover Membership
...,...,...,...,...
177,John Okunski,jokunski@gmail.com,3057105106,Level Up Membership
178,Michael Dazzo,Daz1201@aol.com,7865128331,Restore Membership
179,Melissa Cancio,canciomelissa@yahoo.com,7864575107,Discover Membership
180,Rafael Arte,rarte26@outlook.com,7865893488,Discover Membership


In [11]:
lost_members_df

Unnamed: 0,Name,Email,Phone #,Membership
0,Diana Izeddin,gizeddin@yahoo.com,7.863909e+09,Wellness Membership
1,Marsha Goolcharan,actormarsha@gmail.com,3.239831e+09,Discover Membership
2,Brean Beckford,breanromeo@aol.com,3.057104e+09,Restore Membership
3,Shannon Mathew,nonnahskm93@gmail.com,5.864533e+09,Core Membership
4,Kelcie Laroche,klaroche13@gmail.com,5.183371e+09,Level Up Membership
...,...,...,...,...
267,Vaushaun Harvin,bopsbabygirl@aol.com,7.863667e+09,Discover Membership
268,Donovan Manuel,donovanmanuel27@gmail.com,1.770742e+10,Level Up Membership
269,Cristina Sullivan,cristinasullivan@me.com,7.862295e+09,Level Up Membership
270,Heather Metoyer,hmmetoyer@gmail.com,8.138925e+09,Discover Membership
