# Create final dataset

In [2]:
import pandas as pd

In [3]:
df_payements = pd.read_csv('payments_amount_usd.csv')
df_payements.dtypes

Unnamed: 0             int64
id                    object
donor_id              object
payment_platform      object
portfolio             object
amount               float64
currency              object
date                  object
counterfactuality    float64
pledge_id             object
multiplier           float64
amount_usd           float64
year                   int64
dtype: object

## Add month

In [4]:
df_payements['date'] = pd.to_datetime(df_payements['date'])
df_payements['month'] = df_payements['date'].dt.to_period('M')

## Merge with pledges

In [5]:
df_pledges = pd.read_json('pledges.json')
df_pledges.dtypes

donor_id                       object
pledge_id                      object
donor_chapter                  object
chapter_type                   object
pledge_status                  object
pledge_created_at      datetime64[ns]
pledge_starts_at       datetime64[ns]
pledge_ended_at        datetime64[ns]
contribution_amount           float64
currency                       object
frequency                      object
payment_platform               object
dtype: object

In [6]:
df_merged = df_payements.merge(df_pledges, how='inner', on='pledge_id', suffixes=('_payements', '_pledges'))

In [7]:
df_merged.dtypes

Unnamed: 0                             int64
id                                    object
donor_id_payements                    object
payment_platform_payements            object
portfolio                             object
amount                               float64
currency_payements                    object
date                          datetime64[ns]
counterfactuality                    float64
pledge_id                             object
multiplier                           float64
amount_usd                           float64
year                                   int64
month                              period[M]
donor_id_pledges                      object
donor_chapter                         object
chapter_type                          object
pledge_status                         object
pledge_created_at             datetime64[ns]
pledge_starts_at              datetime64[ns]
pledge_ended_at               datetime64[ns]
contribution_amount                  float64
currency_p

In [8]:
cols_to_drop = [col for col in df_merged.columns if col.endswith('_pledges')]

In [9]:
cols_to_drop

['donor_id_pledges', 'currency_pledges', 'payment_platform_pledges']

In [10]:
df_merged = df_merged.drop(columns=cols_to_drop)

In [11]:
df_merged.dtypes

Unnamed: 0                             int64
id                                    object
donor_id_payements                    object
payment_platform_payements            object
portfolio                             object
amount                               float64
currency_payements                    object
date                          datetime64[ns]
counterfactuality                    float64
pledge_id                             object
multiplier                           float64
amount_usd                           float64
year                                   int64
month                              period[M]
donor_chapter                         object
chapter_type                          object
pledge_status                         object
pledge_created_at             datetime64[ns]
pledge_starts_at              datetime64[ns]
pledge_ended_at               datetime64[ns]
contribution_amount                  float64
frequency                             object
dtype: obj

In [12]:
cols_to_rename = [col for col in df_merged.columns if col.endswith('_payements')]
cols_to_rename

['donor_id_payements', 'payment_platform_payements', 'currency_payements']

In [13]:
df_merged.rename(
    columns=lambda col: col.replace('_payements', '') if col.endswith('_payements') else col,
    inplace=True
)

In [14]:
df_merged.head()

Unnamed: 0.1,Unnamed: 0,id,donor_id,payment_platform,portfolio,amount,currency,date,counterfactuality,pledge_id,...,year,month,donor_chapter,chapter_type,pledge_status,pledge_created_at,pledge_starts_at,pledge_ended_at,contribution_amount,frequency
0,2,5B5XGD4RTX,,Benevity,OFTW Top Picks,-100.0,USD,2024-02-01,0.313636,7373fca9-78cc-4954-baaa-b2acebac595b,...,2024,2024-02,Netflix,,One-Time,2024-02-01,2024-02-01,NaT,-100.0,One-Time
1,3,3Z5NNTTCSD,,Benevity,OFTW Top Picks,-1.01,USD,2022-09-23,0.313636,5e6bc702-7990-4351-a5b5-4cf39401c379,...,2022,2022-09,Microsoft,Corporate,One-Time,2022-09-23,2022-09-23,NaT,-1.01,One-Time
2,4,31V9VD2AAV,,Benevity,OFTW Top Picks,0.0,USD,2021-11-19,0.313636,8a8c1bc3-a6a4-4819-a8c1-732e7a88408b,...,2021,2021-11,Bain Capital Community Partnership,,One-Time,2021-11-19,2021-11-19,NaT,0.0,One-Time
3,5,31V9VJBAN5,,Benevity,OFTW Top Picks,0.0,USD,2021-11-19,0.313636,8a8c1bc3-a6a4-4819-a8c1-732e7a88408b,...,2021,2021-11,Bain Capital Community Partnership,,One-Time,2021-11-19,2021-11-19,NaT,0.0,One-Time
4,6,31V9VMC0YD,,Benevity,OFTW Top Picks,0.0,USD,2021-11-19,0.313636,d16156a3-e981-4019-80ae-bceb2daaf047,...,2021,2021-11,Bain Capital Community Partnership,,One-Time,2021-11-19,2021-11-19,NaT,0.0,One-Time


In [15]:
df_merged.dtypes

Unnamed: 0                      int64
id                             object
donor_id                       object
payment_platform               object
portfolio                      object
amount                        float64
currency                       object
date                   datetime64[ns]
counterfactuality             float64
pledge_id                      object
multiplier                    float64
amount_usd                    float64
year                            int64
month                       period[M]
donor_chapter                  object
chapter_type                   object
pledge_status                  object
pledge_created_at      datetime64[ns]
pledge_starts_at       datetime64[ns]
pledge_ended_at        datetime64[ns]
contribution_amount           float64
frequency                      object
dtype: object

- Drop unnamed

In [20]:
df_merged.dtypes

Unnamed: 0                      int64
id                             object
donor_id                       object
payment_platform               object
portfolio                      object
amount                        float64
currency                       object
date                   datetime64[ns]
counterfactuality             float64
pledge_id                      object
multiplier                    float64
amount_usd                    float64
year                            int64
month                       period[M]
donor_chapter                  object
chapter_type                   object
pledge_status                  object
pledge_created_at      datetime64[ns]
pledge_starts_at       datetime64[ns]
pledge_ended_at        datetime64[ns]
contribution_amount           float64
frequency                      object
dtype: object

## Export to CSV

In [17]:
df_merged.shape
df_merged.memory_usage(deep=True).sum() / (1024**2)  # en Mo

np.float64(42.227601051330566)

In [23]:
df_merged.dtypes

id                             object
donor_id                       object
payment_platform               object
portfolio                      object
amount                        float64
currency                       object
date                   datetime64[ns]
counterfactuality             float64
pledge_id                      object
multiplier                    float64
amount_usd                    float64
year                            int64
month                       period[M]
donor_chapter                  object
chapter_type                   object
pledge_status                  object
pledge_created_at      datetime64[ns]
pledge_starts_at       datetime64[ns]
pledge_ended_at        datetime64[ns]
contribution_amount           float64
frequency                      object
dtype: object

In [22]:
df_merged = df_merged.drop(columns='Unnamed: 0')

In [24]:
df_merged.to_csv('payments_and_pledges.csv', index=False)