## READ in Files from GitHub

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import requests
import io

In [2]:
# Gift Data File Path to GitHub

gifturl = "https://raw.githubusercontent.com/NicoleWittlin/APRA-DataViz-Challenge/master/giving_data_table.csv"
download = requests.get(gifturl).content
giftdata = pd.read_csv(io.StringIO(download.decode("utf-8")))

In [3]:
# Bio Data File Path to GitHub

biourl = "https://raw.githubusercontent.com/NicoleWittlin/APRA-DataViz-Challenge/master/bio_data_table.csv"
download = requests.get(biourl).content
biodata = pd.read_csv(io.StringIO(download.decode("utf-8")))

In [4]:
# Engagement Data File Path to GitHub

engageurl = "https://raw.githubusercontent.com/NicoleWittlin/APRA-DataViz-Challenge/master/engagement_data_table.csv"
download = requests.get(engageurl).content
engagedata = pd.read_csv(io.StringIO(download.decode("utf-8")))

## Spot Check Data

In [5]:
giftdata.head()

Unnamed: 0,household ID,id,gift id,credit Type,gift amt,gift date
0,9662153,8494401,2916764,Soft-Credit,385,10/21/2016
1,2484641,5186919,2916801,Hard-Credit,401,10/21/2016
2,2484641,5929757,2916801,Soft-Credit,401,10/21/2016
3,6982155,7005004,2916813,Hard-Credit,420,10/21/2016
4,6982155,3789171,2916813,Soft-Credit,420,10/21/2016


In [6]:
giftdata.columns

Index(['household ID', 'id', 'gift id', 'credit Type', 'gift amt',
       'gift date'],
      dtype='object')

In [7]:
giftdata.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 378001 entries, 0 to 378000
Data columns (total 6 columns):
 #   Column        Non-Null Count   Dtype 
---  ------        --------------   ----- 
 0   household ID  378001 non-null  int64 
 1   id            378001 non-null  int64 
 2   gift id       378001 non-null  int64 
 3   credit Type   378001 non-null  object
 4   gift amt      378001 non-null  int64 
 5   gift date     378001 non-null  object
dtypes: int64(4), object(2)
memory usage: 17.3+ MB


In [8]:
biodata.head()

Unnamed: 0,id,name,household_id,country,city,birthday,deceased,zip,state,lat,lon,capacity,capacity_source,race
0,1581317,"Patterson, Aeneas",4310723,United States,Agawam,1/1/1900,N,1001.0,MA,42.06,-72.61,>$1k,screening,Non-Hispanic white
1,9952781,"Page, Casie",9248960,United States,Agawam,10/20/1958,N,1001.0,MA,42.06,-72.61,$75k - $100k,screening,Non-Hispanic white
2,6170220,"Vasquez, James",9248960,United States,Agawam,3/10/1958,N,1001.0,MA,42.06,-72.61,$75k - $100k,screening,Non-Hispanic white
3,2012013,"Garcia, Dezmenn",4731003,United States,Amherst,7/20/1940,Y,1002.0,MA,42.37,-72.52,$75k - $100k,,Non-Hispanic white
4,1021063,"Riefstahl, Christopher",6094904,United States,Amherst,7/24/1982,N,1002.0,MA,42.37,-72.52,$75k - $100k,institutional,Non-Hispanic white


In [9]:
biodata.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100000 entries, 0 to 99999
Data columns (total 14 columns):
 #   Column           Non-Null Count   Dtype  
---  ------           --------------   -----  
 0   id               100000 non-null  int64  
 1   name             100000 non-null  object 
 2   household_id     100000 non-null  int64  
 3   country          100000 non-null  object 
 4   city             100000 non-null  object 
 5   birthday         90000 non-null   object 
 6   deceased         90000 non-null   object 
 7   zip              90000 non-null   float64
 8   state            90000 non-null   object 
 9   lat              90000 non-null   float64
 10  lon              90000 non-null   float64
 11  capacity         90000 non-null   object 
 12  capacity_source  90000 non-null   object 
 13  race             100000 non-null  object 
dtypes: float64(3), int64(2), object(9)
memory usage: 10.7+ MB


In [10]:
engagedata.head()

Unnamed: 0,id,last_contact,numer_of_contacts,gift_officer,event,volunteer,time_on_site,interests
0,1581317,,0.0,,Y,,432.0,"skiing,golf,reading,hunting/fishing"
1,2012013,,8.0,,Y,,,
2,1021063,,0.0,,N,,,hunting/fishing
3,2725629,,,,N,,119.0,
4,1880411,,0.0,,N,,,


In [11]:
engagedata.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100000 entries, 0 to 99999
Data columns (total 8 columns):
 #   Column             Non-Null Count   Dtype  
---  ------             --------------   -----  
 0   id                 100000 non-null  int64  
 1   last_contact       63634 non-null   object 
 2   numer_of_contacts  83801 non-null   float64
 3   gift_officer       3589 non-null    object 
 4   event              90002 non-null   object 
 5   volunteer          8969 non-null    float64
 6   time_on_site       20002 non-null   float64
 7   interests          63801 non-null   object 
dtypes: float64(3), int64(1), object(4)
memory usage: 6.1+ MB


## Data Clean Up

In [12]:
# Deceased -- make all blankes N
# df['deceased'] = df['deceased'].fillna('N')
biodata['deceased'] = biodata['deceased'].fillna('N')

In [13]:
biodata.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100000 entries, 0 to 99999
Data columns (total 14 columns):
 #   Column           Non-Null Count   Dtype  
---  ------           --------------   -----  
 0   id               100000 non-null  int64  
 1   name             100000 non-null  object 
 2   household_id     100000 non-null  int64  
 3   country          100000 non-null  object 
 4   city             100000 non-null  object 
 5   birthday         90000 non-null   object 
 6   deceased         100000 non-null  object 
 7   zip              90000 non-null   float64
 8   state            90000 non-null   object 
 9   lat              90000 non-null   float64
 10  lon              90000 non-null   float64
 11  capacity         90000 non-null   object 
 12  capacity_source  90000 non-null   object 
 13  race             100000 non-null  object 
dtypes: float64(3), int64(2), object(9)
memory usage: 10.7+ MB


## MERGE Data Files: Bio + Engagement

In [14]:
bioengagemerge = pd.merge(biodata, engagedata, on="id", how="left")

In [15]:
bioengagemerge.head()

Unnamed: 0,id,name,household_id,country,city,birthday,deceased,zip,state,lat,...,capacity,capacity_source,race,last_contact,numer_of_contacts,gift_officer,event,volunteer,time_on_site,interests
0,1581317,"Patterson, Aeneas",4310723,United States,Agawam,1/1/1900,N,1001.0,MA,42.06,...,>$1k,screening,Non-Hispanic white,,0.0,,Y,,432.0,"skiing,golf,reading,hunting/fishing"
1,9952781,"Page, Casie",9248960,United States,Agawam,10/20/1958,N,1001.0,MA,42.06,...,$75k - $100k,screening,Non-Hispanic white,6/16/2018,23.0,,N,,,"cars,wine,food/dining/cooking"
2,6170220,"Vasquez, James",9248960,United States,Agawam,3/10/1958,N,1001.0,MA,42.06,...,$75k - $100k,screening,Non-Hispanic white,2/3/2020,15.0,,N,,,"golf,reading"
3,2012013,"Garcia, Dezmenn",4731003,United States,Amherst,7/20/1940,Y,1002.0,MA,42.37,...,$75k - $100k,,Non-Hispanic white,,8.0,,Y,,,
4,1021063,"Riefstahl, Christopher",6094904,United States,Amherst,7/24/1982,N,1002.0,MA,42.37,...,$75k - $100k,institutional,Non-Hispanic white,,0.0,,N,,,hunting/fishing


In [16]:
bioengagemerge.tail()

Unnamed: 0,id,name,household_id,country,city,birthday,deceased,zip,state,lat,...,capacity,capacity_source,race,last_contact,numer_of_contacts,gift_officer,event,volunteer,time_on_site,interests
99995,6477965,"Mellon, Aaron",1921919,Russia,Saint Petersburg,3/27/1967,N,,,,...,$50k - $75K,screening,Non-Hispanic white,,11.0,,N,,,"sports,food/dining/cooking"
99996,4608102,"Garman, Mirza",1922047,Russia,Saint Petersburg,6/24/1982,N,,,,...,$5k - $10k,institutional,Non-Hispanic white,1/3/2019,,,Y,,,"fashion,sports,hunting/fishing,cars"
99997,4577636,"el-Atallah, Faadi",1922047,Russia,Saint Petersburg,11/22/1979,N,,,,...,$5k - $10k,screening,Asian,,6.0,,N,,,
99998,8971611,"Ponce, Jessica",1922075,Russia,Saint Petersburg,1/21/1966,N,,,,...,$250k - $500k,screening,Non-Hispanic white,4/8/2017,18.0,,Y,,,"boating/sailing,cars,sports"
99999,5138178,"Cochran, Abren",1922075,Russia,Saint Petersburg,12/20/1963,N,,,,...,$250k - $500k,institutional,Non-Hispanic white,2/19/2017,15.0,,N,,159.0,


In [17]:
bioengagemerge.columns

Index(['id', 'name', 'household_id', 'country', 'city', 'birthday', 'deceased',
       'zip', 'state', 'lat', 'lon', 'capacity', 'capacity_source', 'race',
       'last_contact', 'numer_of_contacts', 'gift_officer', 'event',
       'volunteer', 'time_on_site', 'interests'],
      dtype='object')

In [18]:
bioengagemerge.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 100000 entries, 0 to 99999
Data columns (total 21 columns):
 #   Column             Non-Null Count   Dtype  
---  ------             --------------   -----  
 0   id                 100000 non-null  int64  
 1   name               100000 non-null  object 
 2   household_id       100000 non-null  int64  
 3   country            100000 non-null  object 
 4   city               100000 non-null  object 
 5   birthday           90000 non-null   object 
 6   deceased           100000 non-null  object 
 7   zip                90000 non-null   float64
 8   state              90000 non-null   object 
 9   lat                90000 non-null   float64
 10  lon                90000 non-null   float64
 11  capacity           90000 non-null   object 
 12  capacity_source    90000 non-null   object 
 13  race               100000 non-null  object 
 14  last_contact       63634 non-null   object 
 15  numer_of_contacts  83801 non-null   float64
 16  gif

In [19]:
bioengagemerge.deceased.value_counts()

N    89902
Y    10098
Name: deceased, dtype: int64

In [20]:
bioengagemerge.country.value_counts()

United States    90000
China             5000
India              800
Indonesia          600
Bangladesh         600
Pakistan           600
Nigeria            600
Brazil             600
Mexico             600
Russia             600
Name: country, dtype: int64

In [21]:
bioengagemerge.gift_officer.value_counts()

el-Sawaya, Waseema    208
Yang, Andrew          197
al-Salim, Hamdaan     196
Walter, Dianna        191
Estrada, Jorge        189
Masters, Deisha       188
Kim, Felicia          184
al-Mourad, Haajid     183
al-Salem, Ma,Roof     181
Topper, Kyle          180
Tolbert, Yekalo       178
al-Jaffer, Awda       178
Bowyer, Ryan          178
Varelas, Michael      174
Porras, Marisa        173
Ha, John              172
White, Abigail        172
al-Hakeem, Haamid     169
Middleton, Lauren     154
Sanchez, Nohemi       144
Name: gift_officer, dtype: int64

In [22]:
bioengagemerge.interests.value_counts()

boating/sailing                                       978
skiing                                                961
wine                                                  947
reading                                               926
art                                                   924
                                                     ... 
sports,politics,reading,cars,hunting/fishing            1
travel,wine,skiing,fashion                              1
sports,politics,boating/sailing,fashion,travel          1
travel,cars,wine,golf,art                               1
sports,food/dining/cooking,reading,boating/sailing      1
Name: interests, Length: 24575, dtype: int64

In [23]:
bioengagemerge.describe()

Unnamed: 0,id,household_id,zip,lat,lon,numer_of_contacts,volunteer,time_on_site
count,100000.0,100000.0,90000.0,90000.0,90000.0,83801.0,8969.0,20002.0
mean,5504808.0,5520820.0,50228.713744,37.649065,-91.455561,15.15328,0.218642,335.316518
std,2593475.0,2587095.0,29650.363076,5.203071,16.390225,12.400124,0.413348,236.557722
min,1000065.0,1000399.0,1001.0,19.54,-159.52,0.0,0.0,1.0
25%,3264957.0,3291076.0,26847.0,33.92,-97.74,4.0,0.0,137.0
50%,5501357.0,5532263.0,47106.0,38.79,-86.51,13.0,0.0,293.0
75%,7746170.0,7746223.0,77515.0,41.33,-79.63,24.0,0.0,503.0
max,9999979.0,9999973.0,99901.0,64.9,-67.11,50.0,1.0,994.0


In [None]:
from pandas_profiling import ProfileReport

In [None]:
profileBioEngage = ProfileReport(bioengagemerge, minimal=True)
profileBioEngage.to_file(output_file='BioEngageoutput.html')

## MERGE Data Files: + Giving Data

In [26]:
allmerge = pd.merge(bioengagemerge, giftdata, on="id", how="left")

In [27]:
allmerge.head()

Unnamed: 0,id,name,household_id,country,city,birthday,deceased,zip,state,lat,...,gift_officer,event,volunteer,time_on_site,interests,household ID,gift id,credit Type,gift amt,gift date
0,1581317,"Patterson, Aeneas",4310723,United States,Agawam,1/1/1900,N,1001.0,MA,42.06,...,,Y,,432.0,"skiing,golf,reading,hunting/fishing",,,,,
1,9952781,"Page, Casie",9248960,United States,Agawam,10/20/1958,N,1001.0,MA,42.06,...,,N,,,"cars,wine,food/dining/cooking",9248960.0,4446713.0,Hard-Credit,1115.0,8/27/2017
2,9952781,"Page, Casie",9248960,United States,Agawam,10/20/1958,N,1001.0,MA,42.06,...,,N,,,"cars,wine,food/dining/cooking",9248960.0,5100812.0,Hard-Credit,833.0,1/6/2018
3,6170220,"Vasquez, James",9248960,United States,Agawam,3/10/1958,N,1001.0,MA,42.06,...,,N,,,"golf,reading",9248960.0,4446713.0,Soft-Credit,1115.0,8/27/2017
4,6170220,"Vasquez, James",9248960,United States,Agawam,3/10/1958,N,1001.0,MA,42.06,...,,N,,,"golf,reading",9248960.0,5100812.0,Soft-Credit,833.0,1/6/2018


In [28]:
allmerge.tail()

Unnamed: 0,id,name,household_id,country,city,birthday,deceased,zip,state,lat,...,gift_officer,event,volunteer,time_on_site,interests,household ID,gift id,credit Type,gift amt,gift date
398666,5138178,"Cochran, Abren",1922075,Russia,Saint Petersburg,12/20/1963,N,,,,...,,N,,159.0,,1922075.0,5257709.0,Soft-Credit,2819.0,2/6/2018
398667,5138178,"Cochran, Abren",1922075,Russia,Saint Petersburg,12/20/1963,N,,,,...,,N,,159.0,,1922075.0,6587615.0,Soft-Credit,188.0,11/2/2018
398668,5138178,"Cochran, Abren",1922075,Russia,Saint Petersburg,12/20/1963,N,,,,...,,N,,159.0,,1922075.0,7495647.0,Soft-Credit,3015.0,5/4/2019
398669,5138178,"Cochran, Abren",1922075,Russia,Saint Petersburg,12/20/1963,N,,,,...,,N,,159.0,,1922075.0,7766541.0,Soft-Credit,701.0,6/29/2019
398670,5138178,"Cochran, Abren",1922075,Russia,Saint Petersburg,12/20/1963,N,,,,...,,N,,159.0,,1922075.0,9150276.0,Soft-Credit,8647.0,4/6/2020


In [29]:
allmerge.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 398671 entries, 0 to 398670
Data columns (total 26 columns):
 #   Column             Non-Null Count   Dtype  
---  ------             --------------   -----  
 0   id                 398671 non-null  int64  
 1   name               398671 non-null  object 
 2   household_id       398671 non-null  int64  
 3   country            398671 non-null  object 
 4   city               398671 non-null  object 
 5   birthday           358922 non-null  object 
 6   deceased           398671 non-null  object 
 7   zip                358559 non-null  float64
 8   state              358559 non-null  object 
 9   lat                358559 non-null  float64
 10  lon                358559 non-null  float64
 11  capacity           358929 non-null  object 
 12  capacity_source    358848 non-null  object 
 13  race               398671 non-null  object 
 14  last_contact       287630 non-null  object 
 15  numer_of_contacts  352608 non-null  float64
 16  gi

In [30]:
allmerge.describe()

Unnamed: 0,id,household_id,zip,lat,lon,numer_of_contacts,volunteer,time_on_site,household ID,gift id,gift amt
count,398671.0,398671.0,358559.0,358559.0,358559.0,352608.0,39107.0,79956.0,378001.0,378001.0,378001.0
mean,5507947.0,5510363.0,50139.793788,37.654857,-91.398345,16.784225,0.15519,335.76791,5509499.0,6459635.0,23396.53
std,2595124.0,2587970.0,29647.842879,5.217111,16.37921,12.044591,0.36209,237.498558,2587945.0,2043465.0,134168.7
min,1000065.0,1000399.0,1001.0,19.54,-159.52,0.0,0.0,1.0,1000399.0,2916764.0,1.0
25%,3257124.0,3274852.0,26167.0,33.92,-97.74,7.0,0.0,136.0,3272270.0,4685898.0,459.0
50%,5506530.0,5505880.0,46947.0,38.8,-86.48,15.0,0.0,293.0,5503194.0,6463612.0,1053.0
75%,7754417.0,7736964.0,77474.0,41.34,-79.56,25.0,0.0,504.0,7736940.0,8232541.0,2560.0
max,9999979.0,9999973.0,99901.0,64.9,-67.11,50.0,1.0,994.0,9999973.0,9999963.0,9901063.0


In [None]:
profileAll = ProfileReport(allmerge, minimal=True)
profileAll.to_file(output_file='Alloutput.html')

In [None]:
## LOOKS LIKE THE $385 GIFT IS MISSING A HARD CREDIT PAIR
allmerge.loc[allmerge['id'] == 8494401]

In [None]:
allmerge.loc[allmerge['id'] == 7252527]

## Managed Donors with no gift and no contact in 2020

In [31]:
manageddonors = bioengagemerge.dropna(subset=['gift_officer'])

In [32]:
manageddonors.head()

Unnamed: 0,id,name,household_id,country,city,birthday,deceased,zip,state,lat,...,capacity,capacity_source,race,last_contact,numer_of_contacts,gift_officer,event,volunteer,time_on_site,interests
45,7291661,"Hewitt, Rachel",2347894,United States,East longmeadow,7/5/1982,N,1028.0,MA,42.06,...,$250k - $500k,screening,Non-Hispanic white,3/27/2019,,"el-Sawaya, Waseema",N,,,"sports,reading,politics"
81,7028385,"Le, Isaac",4636351,United States,Ludlow,11/18/1954,N,1056.0,MA,42.16,...,$2.5k - $5k,screening,Hispanic or Latino,2/16/2019,13.0,"Varelas, Michael",Y,,,"food/dining/cooking,boating/sailing,reading"
97,4918844,"Rojas, Julio",5008876,United States,Palmer,10/11/1953,N,1069.0,MA,42.16,...,$250k - $500k,institutional,Asian,5/2/2019,17.0,"Walter, Dianna",N,,,art
150,6622340,"Miller, Austin",2951478,United States,Springfield,1/11/1983,N,1103.0,MA,42.11,...,$50k - $75K,screening,Non-Hispanic white,7/12/2019,18.0,"Middleton, Lauren",,,,"art,sports,skiing,golf"
181,2648419,"Davey, Andrea",1933096,United States,Springfield,,N,1118.0,MA,42.11,...,$10k - $25k,screening,Hispanic or Latino,1/9/2019,9.0,"Bowyer, Ryan",,,,"sports,hunting/fishing,travel,wine,reading"


In [33]:
manageddonors.tail()

Unnamed: 0,id,name,household_id,country,city,birthday,deceased,zip,state,lat,...,capacity,capacity_source,race,last_contact,numer_of_contacts,gift_officer,event,volunteer,time_on_site,interests
99925,3627531,"Winters, Andrew",1915412,Russia,Saint Petersburg,11/11/1970,N,,,,...,$25k - $50k,institutional,Native Americans or Alska Natives,11/30/2015,13.0,"Yang, Andrew",Y,,46.0,"skiing,golf,wine,travel,food/dining/cooking"
99934,3999267,"el-Haider, Abdur Raqeeb",1915995,Russia,Saint Petersburg,1/24/1945,N,,,,...,$100k - $250k,,Two or more races,6/22/2019,13.0,"Porras, Marisa",Y,,,"fashion,art"
99946,9208662,"Thao, Tommy",1917341,Russia,Saint Petersburg,7/24/1953,N,,,,...,$75k - $100k,screening,Two or more races,1/17/2019,1.0,"Middleton, Lauren",,,,wine
99948,1338739,"Gosney, Neha",1917486,Russia,Saint Petersburg,10/17/1962,N,,,,...,$500k - $750k,,Non-Hispanic white,,,"el-Sawaya, Waseema",Y,,,"food/dining/cooking,travel,sports,skiing,golf"
99974,1527338,"Hull, Deonta",1920027,Russia,Saint Petersburg,4/30/1950,Y,,,,...,$250k - $500k,screening,Non-Hispanic white,3/1/2019,2.0,"al-Salim, Hamdaan",N,,594.0,"travel,cars,golf,skiing"


In [34]:
manageddonors.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 3589 entries, 45 to 99974
Data columns (total 21 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   id                 3589 non-null   int64  
 1   name               3589 non-null   object 
 2   household_id       3589 non-null   int64  
 3   country            3589 non-null   object 
 4   city               3589 non-null   object 
 5   birthday           3246 non-null   object 
 6   deceased           3589 non-null   object 
 7   zip                3243 non-null   float64
 8   state              3243 non-null   object 
 9   lat                3243 non-null   float64
 10  lon                3243 non-null   float64
 11  capacity           3248 non-null   object 
 12  capacity_source    3234 non-null   object 
 13  race               3589 non-null   object 
 14  last_contact       2527 non-null   object 
 15  numer_of_contacts  3116 non-null   float64
 16  gift_officer       358

In [35]:
manageddonors.deceased.value_counts()

N    3216
Y     373
Name: deceased, dtype: int64

In [36]:
deceased = manageddonors[manageddonors['deceased'] == 'Y']

In [None]:
deceased.head()

In [None]:
deceased.to_csv('deceased.csv')

In [37]:
activemanage = manageddonors[manageddonors['deceased'] == 'N']

In [38]:
activemanage.head()

Unnamed: 0,id,name,household_id,country,city,birthday,deceased,zip,state,lat,...,capacity,capacity_source,race,last_contact,numer_of_contacts,gift_officer,event,volunteer,time_on_site,interests
45,7291661,"Hewitt, Rachel",2347894,United States,East longmeadow,7/5/1982,N,1028.0,MA,42.06,...,$250k - $500k,screening,Non-Hispanic white,3/27/2019,,"el-Sawaya, Waseema",N,,,"sports,reading,politics"
81,7028385,"Le, Isaac",4636351,United States,Ludlow,11/18/1954,N,1056.0,MA,42.16,...,$2.5k - $5k,screening,Hispanic or Latino,2/16/2019,13.0,"Varelas, Michael",Y,,,"food/dining/cooking,boating/sailing,reading"
97,4918844,"Rojas, Julio",5008876,United States,Palmer,10/11/1953,N,1069.0,MA,42.16,...,$250k - $500k,institutional,Asian,5/2/2019,17.0,"Walter, Dianna",N,,,art
150,6622340,"Miller, Austin",2951478,United States,Springfield,1/11/1983,N,1103.0,MA,42.11,...,$50k - $75K,screening,Non-Hispanic white,7/12/2019,18.0,"Middleton, Lauren",,,,"art,sports,skiing,golf"
181,2648419,"Davey, Andrea",1933096,United States,Springfield,,N,1118.0,MA,42.11,...,$10k - $25k,screening,Hispanic or Latino,1/9/2019,9.0,"Bowyer, Ryan",,,,"sports,hunting/fishing,travel,wine,reading"


In [39]:
activemanage.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 3216 entries, 45 to 99948
Data columns (total 21 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   id                 3216 non-null   int64  
 1   name               3216 non-null   object 
 2   household_id       3216 non-null   int64  
 3   country            3216 non-null   object 
 4   city               3216 non-null   object 
 5   birthday           2900 non-null   object 
 6   deceased           3216 non-null   object 
 7   zip                2911 non-null   float64
 8   state              2911 non-null   object 
 9   lat                2911 non-null   float64
 10  lon                2911 non-null   float64
 11  capacity           2906 non-null   object 
 12  capacity_source    2897 non-null   object 
 13  race               3216 non-null   object 
 14  last_contact       2262 non-null   object 
 15  numer_of_contacts  2784 non-null   float64
 16  gift_officer       321

In [40]:
activemanage.deceased.value_counts()

N    3216
Name: deceased, dtype: int64

In [41]:
activemanage.gift_officer.value_counts()

el-Sawaya, Waseema    185
al-Salim, Hamdaan     177
Yang, Andrew          172
Walter, Dianna        170
Estrada, Jorge        170
Masters, Deisha       167
Tolbert, Yekalo       163
Kim, Felicia          163
Varelas, Michael      162
al-Mourad, Haajid     161
al-Salem, Ma,Roof     161
Topper, Kyle          161
Ha, John              159
Porras, Marisa        159
Bowyer, Ryan          156
al-Hakeem, Haamid     154
al-Jaffer, Awda       153
White, Abigail        150
Middleton, Lauren     142
Sanchez, Nohemi       131
Name: gift_officer, dtype: int64

In [42]:
activemanage.last_contact.value_counts()

10/15/2018    7
11/4/2016     7
10/25/2017    7
5/20/2019     6
10/16/2018    6
             ..
6/24/2020     1
4/30/2016     1
6/17/2017     1
5/30/2020     1
2/12/2016     1
Name: last_contact, Length: 1233, dtype: int64

In [43]:
activemanage.to_csv('activemanage.csv')

In [44]:
activemanagegift = pd.merge(activemanage, giftdata, on="id", how="left")

In [45]:
activemanagegift.to_csv('activemanagegift.csv')