### Import Modules and Python Functions

In [1]:
import os
import numpy as np
import pandas as pd
import yaml
with open('../python/variables.yaml') as info:
    VARS = yaml.load(info, Loader=yaml.FullLoader)

### Import Variables

In [2]:
# Events
events_csv = VARS['EVENTS_CSV']
events_cols_all = VARS['EVENTS_COLS_ALL']
events_cols_select = VARS['EVENTS_COLS_SELECT']

# Mentions
mentions_csv = VARS['MENTIONS_CSV']
mentions_cols_all = VARS['MENTIONS_COLS_ALL']
mentions_cols_select = VARS['MENTIONS_COLS_SELECT']

# CAMEO
cameo_verbs = VARS['CAMEO_VERBS']
cameo_quadclass = VARS['CAMEO_QUADCLASS']

# Output
desired_columns = VARS['DESIRED_COLUMNS']

### Define Reusable Python Functions

In [3]:
def get_code_strings(df: pd, codes_col: str, strings: list) -> pd:
    """
    Create Column for Strings Associated with Code Column
    
    :param df: dataframe of cleaned data
    :param codes_col: name of the code column in dataframe
    :param strings: list of strings associated with code column
    :rtype: dataframes
    :return: 
        :df: param dataframe with new string column
        :verified_df: dataframe to confirm correct code/string column creation
    """
    
    assert len(df[codes_col].unique()) == len(strings), "Length of codes and strings list are not equal"
    
    # Convert lists to dictionary 
    codes = df[codes_col].sort_values(ascending=True).unique()
    code_dict = {codes[i]: strings[i] for i in range(len(codes))}
    
    # Add column for code strings
    codes_string_col = codes_col+'String'
    df[codes_string_col] = df[codes_col].map(code_dict)

    # verify output
    verify_df = df[[codes_col, codes_string_col]].sort_values(by=codes_col, ascending=True).drop_duplicates()

    # return df and verified output
    return df, verify_df

### Import Data

In [4]:
# get data
gdelt_df = pd.DataFrame(pd.read_csv('../select_data/data/bq_data_feb2021.csv')).drop(columns='Unnamed: 0')
print(gdelt_df.shape)
gdelt_df.head()

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


(1000000, 15)


Unnamed: 0,GLOBALEVENTID,EventTimeDate,MentionTimeDate,Confidence,MentionDocTone,EventCode,EventRootCode,QuadClass,GoldsteinScale,ActionGeo_Type,ActionGeo_FullName,ActionGeo_CountryCode,ActionGeo_Lat,ActionGeo_Long,SOURCEURL
0,968305988,20210206114500,20210206114500,100,-1.383043,75,7,2,7.0,4,"Timna, HaDarom, Israel",IS,29.7667,34.9833,https://blogs.timesofisrael.com/israels-interi...
1,970822400,20210221193000,20210221193000,60,-2.877698,834,8,2,5.0,4,"Algiers, Alger, Algeria",AG,36.7631,3.05056,https://www.aa.com.tr/en/middle-east/algeria-d...
2,970752273,20210221051500,20210221051500,20,4.621849,86,8,2,9.0,1,China,CH,35.0,105.0,http://www.china.org.cn/world/2021-02/21/conte...
3,971296324,20210224093000,20210224114500,50,0.621118,44,4,1,2.5,4,"Beijing, Beijing, China",CH,39.9289,116.388,https://www.aninews.in/news/world/asia/india-s...
4,971681977,20210226064500,20210226064500,50,-0.581395,44,4,1,2.5,1,Singapore,SN,1.366667,103.8,https://www.malaysiakini.com/news/564438


In [5]:
print('Number of Global Event Ids: ', len(gdelt_df['GLOBALEVENTID'].unique()))

Number of Global Event Ids:  469260


In [6]:
# Assess date range
print('Min Event Date: ', gdelt_df['EventTimeDate'].min())
print('Max Event Date: ', gdelt_df['EventTimeDate'].max())

Min Event Date:  20210201000000
Max Event Date:  20210305183000


In [7]:
# Assess date range
print('Min Mentions Date: ', gdelt_df['MentionTimeDate'].min())
print('Max Mentions Date: ', gdelt_df['MentionTimeDate'].max())

Min Mentions Date:  20210201000000
Max Mentions Date:  20210305183000


### Assess Data Types

In [8]:
gdelt_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000000 entries, 0 to 999999
Data columns (total 15 columns):
 #   Column                 Non-Null Count    Dtype  
---  ------                 --------------    -----  
 0   GLOBALEVENTID          1000000 non-null  int64  
 1   EventTimeDate          1000000 non-null  int64  
 2   MentionTimeDate        1000000 non-null  int64  
 3   Confidence             1000000 non-null  int64  
 4   MentionDocTone         1000000 non-null  float64
 5   EventCode              1000000 non-null  object 
 6   EventRootCode          1000000 non-null  object 
 7   QuadClass              1000000 non-null  int64  
 8   GoldsteinScale         999988 non-null   float64
 9   ActionGeo_Type         1000000 non-null  int64  
 10  ActionGeo_FullName     945345 non-null   object 
 11  ActionGeo_CountryCode  946275 non-null   object 
 12  ActionGeo_Lat          945342 non-null   float64
 13  ActionGeo_Long         945741 non-null   float64
 14  SOURCEURL          

#### Change object EventCode columns to floats

In [9]:
# Convert EventCode and EventRootCode 
gdelt_df['EventCode'] = pd.to_numeric(gdelt_df['EventCode'].replace('---', np.nan), downcast='integer')
gdelt_df['EventRootCode'] = pd.to_numeric(gdelt_df['EventRootCode'].replace('--', np.nan), downcast='integer')
gdelt_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000000 entries, 0 to 999999
Data columns (total 15 columns):
 #   Column                 Non-Null Count    Dtype  
---  ------                 --------------    -----  
 0   GLOBALEVENTID          1000000 non-null  int64  
 1   EventTimeDate          1000000 non-null  int64  
 2   MentionTimeDate        1000000 non-null  int64  
 3   Confidence             1000000 non-null  int64  
 4   MentionDocTone         1000000 non-null  float64
 5   EventCode              999999 non-null   float64
 6   EventRootCode          999999 non-null   float64
 7   QuadClass              1000000 non-null  int64  
 8   GoldsteinScale         999988 non-null   float64
 9   ActionGeo_Type         1000000 non-null  int64  
 10  ActionGeo_FullName     945345 non-null   object 
 11  ActionGeo_CountryCode  946275 non-null   object 
 12  ActionGeo_Lat          945342 non-null   float64
 13  ActionGeo_Long         945741 non-null   float64
 14  SOURCEURL          

#### Select Mentions within first 60 Days of an Event

In [10]:
# Calculate days between
print(gdelt_df.shape)
gdelt_df['DaysBetween'] = gdelt_df['MentionTimeDate'] - gdelt_df['EventTimeDate']
# Verify output
print( 'Max Days: ', max(gdelt_df['DaysBetween']))
print( 'Min Days: ', min(gdelt_df['DaysBetween']))
print( 'Mode Days: ', gdelt_df[['DaysBetween']].mode())

(1000000, 15)
Max Days:  102220000
Min Days:  -5500
Mode Days:     DaysBetween
0            0


In [11]:
# Drop rows where days_between <= 60 days
gdelt_60d_df = gdelt_df[gdelt_df['DaysBetween'] <= 60].reset_index(drop=True)
print(gdelt_60d_df.shape)
print('Number of Global Event Ids: ', len(gdelt_60d_df['GLOBALEVENTID'].unique()))
gdelt_60d_df.head()

(461672, 16)
Number of Global Event Ids:  435600


Unnamed: 0,GLOBALEVENTID,EventTimeDate,MentionTimeDate,Confidence,MentionDocTone,EventCode,EventRootCode,QuadClass,GoldsteinScale,ActionGeo_Type,ActionGeo_FullName,ActionGeo_CountryCode,ActionGeo_Lat,ActionGeo_Long,SOURCEURL,DaysBetween
0,968305988,20210206114500,20210206114500,100,-1.383043,75.0,7.0,2,7.0,4,"Timna, HaDarom, Israel",IS,29.7667,34.9833,https://blogs.timesofisrael.com/israels-interi...,0
1,970822400,20210221193000,20210221193000,60,-2.877698,834.0,8.0,2,5.0,4,"Algiers, Alger, Algeria",AG,36.7631,3.05056,https://www.aa.com.tr/en/middle-east/algeria-d...,0
2,970752273,20210221051500,20210221051500,20,4.621849,86.0,8.0,2,9.0,1,China,CH,35.0,105.0,http://www.china.org.cn/world/2021-02/21/conte...,0
3,971681977,20210226064500,20210226064500,50,-0.581395,44.0,4.0,1,2.5,1,Singapore,SN,1.366667,103.8,https://www.malaysiakini.com/news/564438,0
4,968704114,20210209064500,20210209064500,20,0.665779,152.0,15.0,4,-7.2,1,Russia,RS,60.0,100.0,https://www.ukrinform.ua/rubric-society/318644...,0


#### Change int64 dates to datetimes

In [12]:
date_format = '%Y%m%d%H%M%S'
gdelt_60d_df['EventTimeDate'] = pd.to_datetime(gdelt_60d_df['EventTimeDate'].astype(str), format=date_format)
gdelt_60d_df['MentionTimeDate'] = pd.to_datetime(gdelt_60d_df['MentionTimeDate'].astype(str), format=date_format)
gdelt_60d_df.head()

Unnamed: 0,GLOBALEVENTID,EventTimeDate,MentionTimeDate,Confidence,MentionDocTone,EventCode,EventRootCode,QuadClass,GoldsteinScale,ActionGeo_Type,ActionGeo_FullName,ActionGeo_CountryCode,ActionGeo_Lat,ActionGeo_Long,SOURCEURL,DaysBetween
0,968305988,2021-02-06 11:45:00,2021-02-06 11:45:00,100,-1.383043,75.0,7.0,2,7.0,4,"Timna, HaDarom, Israel",IS,29.7667,34.9833,https://blogs.timesofisrael.com/israels-interi...,0
1,970822400,2021-02-21 19:30:00,2021-02-21 19:30:00,60,-2.877698,834.0,8.0,2,5.0,4,"Algiers, Alger, Algeria",AG,36.7631,3.05056,https://www.aa.com.tr/en/middle-east/algeria-d...,0
2,970752273,2021-02-21 05:15:00,2021-02-21 05:15:00,20,4.621849,86.0,8.0,2,9.0,1,China,CH,35.0,105.0,http://www.china.org.cn/world/2021-02/21/conte...,0
3,971681977,2021-02-26 06:45:00,2021-02-26 06:45:00,50,-0.581395,44.0,4.0,1,2.5,1,Singapore,SN,1.366667,103.8,https://www.malaysiakini.com/news/564438,0
4,968704114,2021-02-09 06:45:00,2021-02-09 06:45:00,20,0.665779,152.0,15.0,4,-7.2,1,Russia,RS,60.0,100.0,https://www.ukrinform.ua/rubric-society/318644...,0


In [13]:
# Verify output
max(gdelt_60d_df['DaysBetween'])

0

#### Group Mentions Data by GlobalEventId for Average Confidence and Mention Tone

agg_cols = ['GLOBALEVENTID','MeanConfidence', 'MeanMentionDocTone']

gdelt_sm_60d_agg_df = gdelt_sm_60d_df.groupby(['GLOBALEVENTID', 'EventTimeDate'], as_index=False,
                                             ).mean() # get mean Confidence, Tone, DaysBetween
gdelt_sm_60d_agg_df.head()

### Assess Null Values

In [14]:
merged_df = gdelt_60d_df.copy()
print(merged_df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 461672 entries, 0 to 461671
Data columns (total 16 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   GLOBALEVENTID          461672 non-null  int64         
 1   EventTimeDate          461672 non-null  datetime64[ns]
 2   MentionTimeDate        461672 non-null  datetime64[ns]
 3   Confidence             461672 non-null  int64         
 4   MentionDocTone         461672 non-null  float64       
 5   EventCode              461671 non-null  float64       
 6   EventRootCode          461671 non-null  float64       
 7   QuadClass              461672 non-null  int64         
 8   GoldsteinScale         461663 non-null  float64       
 9   ActionGeo_Type         461672 non-null  int64         
 10  ActionGeo_FullName     437335 non-null  object        
 11  ActionGeo_CountryCode  437785 non-null  object        
 12  ActionGeo_Lat          437332 non-null  floa

#### Based on project requirements, the data source for visualization presence of *non-null* values in the following columns:
- GlobalEventId
- EventTimeDate
- ActionGeo_CountryCode
- EventCode
- GoldsteinScale
- MentionDocTone

In [15]:
# Drop all rows in merged_df with nulls in the specified columns
required_value_columns = ['GLOBALEVENTID', 'EventTimeDate', 'ActionGeo_CountryCode', 
                          'EventCode', 'GoldsteinScale', 'MentionDocTone']

cleaned_merged_df = merged_df[~pd.isnull(merged_df[required_value_columns]).any(axis=1)].reset_index(drop=True)
print(cleaned_merged_df.shape)
cleaned_merged_df.head(1)

(437776, 16)


Unnamed: 0,GLOBALEVENTID,EventTimeDate,MentionTimeDate,Confidence,MentionDocTone,EventCode,EventRootCode,QuadClass,GoldsteinScale,ActionGeo_Type,ActionGeo_FullName,ActionGeo_CountryCode,ActionGeo_Lat,ActionGeo_Long,SOURCEURL,DaysBetween
0,968305988,2021-02-06 11:45:00,2021-02-06 11:45:00,100,-1.383043,75.0,7.0,2,7.0,4,"Timna, HaDarom, Israel",IS,29.7667,34.9833,https://blogs.timesofisrael.com/israels-interi...,0


### Replace Cameo Code Root Integer Values with Associated String

In [16]:
print(cameo_verbs)

['MAKE PUBLIC STATEMENT', 'APPEAL', 'EXPRESS INTENT TO COOPERATE', 'CONSULT', 'ENGAGE IN DIPLOMATIC COOPERATION', 'ENGAGE IN MATERIAL COOPERATION', 'PROVIDE AID', 'YIELD', 'INVESTIGATE', 'DEMAND', 'DISAPPROVE', 'REJECT', 'THREATEN', 'PROTEST', 'EXHIBIT MILITARY POSTURE', 'REDUCE RELATIONS', 'COERCE', 'ASSAULT', 'FIGHT', 'ENGAGE IN UNCONVENTIONAL MASS VIOLENCE']


In [17]:
# Add string column
cleaned_merged_df, cameo_code_df = get_code_strings(cleaned_merged_df, 'EventRootCode', cameo_verbs)
cameo_code_df

Unnamed: 0,EventRootCode,EventRootCodeString
95336,1.0,MAKE PUBLIC STATEMENT
145917,2.0,APPEAL
154172,3.0,EXPRESS INTENT TO COOPERATE
77494,4.0,CONSULT
190644,5.0,ENGAGE IN DIPLOMATIC COOPERATION
213071,6.0,ENGAGE IN MATERIAL COOPERATION
225292,7.0,PROVIDE AID
374011,8.0,YIELD
38566,9.0,INVESTIGATE
413628,10.0,DEMAND


In [18]:
# verify output
cleaned_merged_df.head(2)

Unnamed: 0,GLOBALEVENTID,EventTimeDate,MentionTimeDate,Confidence,MentionDocTone,EventCode,EventRootCode,QuadClass,GoldsteinScale,ActionGeo_Type,ActionGeo_FullName,ActionGeo_CountryCode,ActionGeo_Lat,ActionGeo_Long,SOURCEURL,DaysBetween,EventRootCodeString
0,968305988,2021-02-06 11:45:00,2021-02-06 11:45:00,100,-1.383043,75.0,7.0,2,7.0,4,"Timna, HaDarom, Israel",IS,29.7667,34.9833,https://blogs.timesofisrael.com/israels-interi...,0,PROVIDE AID
1,970822400,2021-02-21 19:30:00,2021-02-21 19:30:00,60,-2.877698,834.0,8.0,2,5.0,4,"Algiers, Alger, Algeria",AG,36.7631,3.05056,https://www.aa.com.tr/en/middle-east/algeria-d...,0,YIELD


### Replace Cameo Code Root Integer Values with Associated String

In [19]:
print(cameo_quadclass)

['Verbal Cooperation', 'Material Cooperation', 'Verbal Conflict', 'Material Conflict']


In [20]:
# get string column
cleaned_merged_df, cameo_quadclass_df = get_code_strings(cleaned_merged_df, 'QuadClass', cameo_quadclass)
cameo_quadclass_df

Unnamed: 0,QuadClass,QuadClassString
109650,1,Verbal Cooperation
225255,2,Material Cooperation
15009,3,Verbal Conflict
71734,4,Material Conflict


In [21]:
# verify output
cleaned_merged_df.head(2)

Unnamed: 0,GLOBALEVENTID,EventTimeDate,MentionTimeDate,Confidence,MentionDocTone,EventCode,EventRootCode,QuadClass,GoldsteinScale,ActionGeo_Type,ActionGeo_FullName,ActionGeo_CountryCode,ActionGeo_Lat,ActionGeo_Long,SOURCEURL,DaysBetween,EventRootCodeString,QuadClassString
0,968305988,2021-02-06 11:45:00,2021-02-06 11:45:00,100,-1.383043,75.0,7.0,2,7.0,4,"Timna, HaDarom, Israel",IS,29.7667,34.9833,https://blogs.timesofisrael.com/israels-interi...,0,PROVIDE AID,Material Cooperation
1,970822400,2021-02-21 19:30:00,2021-02-21 19:30:00,60,-2.877698,834.0,8.0,2,5.0,4,"Algiers, Alger, Algeria",AG,36.7631,3.05056,https://www.aa.com.tr/en/middle-east/algeria-d...,0,YIELD,Material Cooperation


### Replace Country Codes with Country Names

In [22]:
# get country data
country_codes_df = pd.DataFrame(pd.read_csv('../select_data/data/countries.csv', encoding= 'unicode_escape'))

# convert lists to dictionary 
country_code_dict = {country_codes_df['alpha-2'][i]: country_codes_df['name'][i] for i in range(len(country_codes_df))}

# Add column for cameo code root strings (verbs)
cleaned_merged_df['ActionGeo_FullName'] = cleaned_merged_df['ActionGeo_CountryCode'].map(country_code_dict)
cleaned_merged_df.head()

Unnamed: 0,GLOBALEVENTID,EventTimeDate,MentionTimeDate,Confidence,MentionDocTone,EventCode,EventRootCode,QuadClass,GoldsteinScale,ActionGeo_Type,ActionGeo_FullName,ActionGeo_CountryCode,ActionGeo_Lat,ActionGeo_Long,SOURCEURL,DaysBetween,EventRootCodeString,QuadClassString
0,968305988,2021-02-06 11:45:00,2021-02-06 11:45:00,100,-1.383043,75.0,7.0,2,7.0,4,Iceland,IS,29.7667,34.9833,https://blogs.timesofisrael.com/israels-interi...,0,PROVIDE AID,Material Cooperation
1,970822400,2021-02-21 19:30:00,2021-02-21 19:30:00,60,-2.877698,834.0,8.0,2,5.0,4,Antigua and Barbuda,AG,36.7631,3.05056,https://www.aa.com.tr/en/middle-east/algeria-d...,0,YIELD,Material Cooperation
2,970752273,2021-02-21 05:15:00,2021-02-21 05:15:00,20,4.621849,86.0,8.0,2,9.0,1,Switzerland,CH,35.0,105.0,http://www.china.org.cn/world/2021-02/21/conte...,0,YIELD,Material Cooperation
3,971681977,2021-02-26 06:45:00,2021-02-26 06:45:00,50,-0.581395,44.0,4.0,1,2.5,1,Senegal,SN,1.366667,103.8,https://www.malaysiakini.com/news/564438,0,CONSULT,Verbal Cooperation
4,968704114,2021-02-09 06:45:00,2021-02-09 06:45:00,20,0.665779,152.0,15.0,4,-7.2,1,Serbia,RS,60.0,100.0,https://www.ukrinform.ua/rubric-society/318644...,0,EXHIBIT MILITARY POSTURE,Material Conflict


In [23]:
country_code_dict

{'AF': 'Afghanistan',
 'AX': '\x81land Islands',
 'AL': 'Albania',
 'DZ': 'Algeria',
 'AS': 'American Samoa',
 'AD': 'Andorra',
 'AO': 'Angola',
 'AI': 'Anguilla',
 'AQ': 'Antarctica',
 'AG': 'Antigua and Barbuda',
 'AR': 'Argentina',
 'AM': 'Armenia',
 'AW': 'Aruba',
 'AU': 'Australia',
 'AT': 'Austria',
 'AZ': 'Azerbaijan',
 'BS': 'Bahamas',
 'BH': 'Bahrain',
 'BD': 'Bangladesh',
 'BB': 'Barbados',
 'BY': 'Belarus',
 'BE': 'Belgium',
 'BZ': 'Belize',
 'BJ': 'Benin',
 'BM': 'Bermuda',
 'BT': 'Bhutan',
 'BO': 'Bolivia (Plurinational State of)',
 'BQ': 'Bonaire, Sint Eustatius and Saba',
 'BA': 'Bosnia and Herzegovina',
 'BW': 'Botswana',
 'BV': 'Bouvet Island',
 'BR': 'Brazil',
 'IO': 'British Indian Ocean Territory',
 'BN': 'Brunei Darussalam',
 'BG': 'Bulgaria',
 'BF': 'Burkina Faso',
 'BI': 'Burundi',
 'CV': 'Cabo Verde',
 'KH': 'Cambodia',
 'CM': 'Cameroon',
 'CA': 'Canada',
 'KY': 'Cayman Islands',
 'CF': 'Central African Republic',
 'TD': 'Chad',
 'CL': 'Chile',
 'CN': 'China',
 

In [24]:
# verify output
cameo_country_df = cleaned_merged_df[['ActionGeo_CountryCode', 'ActionGeo_FullName']].sort_values(by='ActionGeo_CountryCode',
                                                                                ascending=True).drop_duplicates()
cameo_country_df.head(50)

Unnamed: 0,ActionGeo_CountryCode,ActionGeo_FullName
13655,AA,
3578,AC,
333743,AE,United Arab Emirates
139211,AF,Afghanistan
223651,AG,Antigua and Barbuda
316823,AJ,
425071,AL,Albania
324797,AM,Armenia
304078,AN,
82852,AO,Angola


In [25]:
# Select Desired Columns for Data Factory Output
cleaned_merged_df = cleaned_merged_df[desired_columns]
print('Cleaned Data with Desired Columns: ',cleaned_merged_df.shape)
cleaned_merged_df.head()

Cleaned Data with Desired Columns:  (437776, 16)


Unnamed: 0,GLOBALEVENTID,EventTimeDate,MentionTimeDate,DaysBetween,Confidence,MentionDocTone,ActionGeo_CountryCode,ActionGeo_FullName,EventCode,EventRootCodeString,QuadClass,QuadClassString,GoldsteinScale,ActionGeo_Lat,ActionGeo_Long,SOURCEURL
0,968305988,2021-02-06 11:45:00,2021-02-06 11:45:00,0,100,-1.383043,IS,Iceland,75.0,PROVIDE AID,2,Material Cooperation,7.0,29.7667,34.9833,https://blogs.timesofisrael.com/israels-interi...
1,970822400,2021-02-21 19:30:00,2021-02-21 19:30:00,0,60,-2.877698,AG,Antigua and Barbuda,834.0,YIELD,2,Material Cooperation,5.0,36.7631,3.05056,https://www.aa.com.tr/en/middle-east/algeria-d...
2,970752273,2021-02-21 05:15:00,2021-02-21 05:15:00,0,20,4.621849,CH,Switzerland,86.0,YIELD,2,Material Cooperation,9.0,35.0,105.0,http://www.china.org.cn/world/2021-02/21/conte...
3,971681977,2021-02-26 06:45:00,2021-02-26 06:45:00,0,50,-0.581395,SN,Senegal,44.0,CONSULT,1,Verbal Cooperation,2.5,1.366667,103.8,https://www.malaysiakini.com/news/564438
4,968704114,2021-02-09 06:45:00,2021-02-09 06:45:00,0,20,0.665779,RS,Serbia,152.0,EXHIBIT MILITARY POSTURE,4,Material Conflict,-7.2,60.0,100.0,https://www.ukrinform.ua/rubric-society/318644...


In [26]:
# Store dataframe globally
%store cleaned_merged_df

Stored 'cleaned_merged_df' (DataFrame)
