# EPU Index Construction

In [1]:
# Global Option 
start_yr = 2004
end_yr = 2025
end_mt = 1

In [2]:
# Required Packages
import pandas as pd
import numpy as np

### Core Index with English Language Newspapers

In [3]:
# Load Monthly Counts Files

independent = pd.read_csv('Independent/independent_monthly.csv')
print('Malta Independent')
print(independent.head())

malta_today = pd.read_csv('Malta Today/malta_today_monthly.csv')
print('Malta Today')
print(malta_today.head())

times = pd.read_csv('Times/times_monthly.csv')
print('Times of Malta')
print(times.head())

tvm = pd.read_csv('TVM/tvm_monthly.csv')
print('TVM')
print(tvm.head())

Malta Independent
   Year  Month  Articles  EPU
0  2004     12        36    0
1  2005      1       306    3
2  2005      2       279    1
3  2005      3       317    1
4  2005      4       782    5
Malta Today
   Year  Month  Articles  EPU
0  2004      1        45    2
1  2004      2        61    0
2  2004      3        56    0
3  2004      4        56    1
4  2004      5        68    3
Times of Malta
   Year  Month  Articles  EPU
0  2002      4         5    0
1  2002      5         4    0
2  2002      6       842    6
3  2002      7       869    9
4  2002      8       726    4
TVM
     Year  Month  Articles  EPU
0  2015.0    6.0        35    0
1  2015.0    7.0       216    1
2  2015.0    8.0       349    0
3  2015.0    9.0       291    1
4  2015.0   10.0       316    0


In [4]:
# Specify List of Dataframes
df_list = [independent, malta_today, times, tvm]

In [5]:
# Specify List of Dataframe Names
df_names = ['Malta Independent', 'Malta Today', 'Times of Malta', 'TVM']

In [6]:
# Data Cleaning
for i in range(0,len(df_list)):
    df_list[i]['Year'] = df_list[i]['Year'].astype(int) 
    df_list[i]['Month'] = df_list[i]['Month'].astype(int)
    # Enforce Date Range
    df_list[i] = df_list[i][df_list[i]['Year'] >= start_yr]
    df_list[i] = df_list[i][df_list[i]['Year'] <= end_yr]
    df_list[i] = df_list[i][~((df_list[i]['Year'] == end_yr) & (df_list[i]['Month'] > end_mt))]
    # Make Sure Number of Articles is Greater than 10
    df_list[i] = df_list[i][df_list[i]['Articles'] > 10]

In [7]:
# Generate Newspaper Specific Indicators - EPU Count / Total Count
for i in range(0,len(df_list)):
    df_list[i]['EPUI'] = df_list[i]['EPU'] / df_list[i]['Articles']

In [8]:
# Get Standard Deviations
st_dev = []
for i in range(0,len(df_list)):
    st_dev.append(df_list[i]['EPUI'].std())

In [9]:
# Standardise Newspaper Indices
for i in range(0,len(df_list)):
    df_list[i]['EPUI'] = df_list[i]['EPUI'] / st_dev[i]

In [10]:
# Get Expanded EPU Dataframe

# Get Date Feature
for i in range(0,len(df_list)):
    df_list[i]['Date'] = df_list[i]['Year'].astype(str) + '-' + df_list[i]['Month'].astype(str) + '-01'
    df_list[i]['Date'] = pd.to_datetime(df_list[i]['Date'])

# Get Subset of Dataframes -> Dates Merge to be Based on Times Dataset (Largest)
independent_index = df_list[0][['Date', 'EPUI']]
malta_today_index = df_list[1][['Date', 'EPUI']]
times_index = df_list[2][['Date', 'Year', 'Month', 'EPUI']]
tvm_index = df_list[3][['Date', 'EPUI']]

# Rename EPU Columns
pd.options.mode.chained_assignment = None #prevent SettingWithCopyWarning message from appearing
independent_index.rename(columns = {'EPUI':'EPUI_Independent'}, inplace = True)
times_index.rename(columns = {'EPUI':'EPUI_Times'}, inplace = True)
malta_today_index.rename(columns = {'EPUI':'EPUI_MaltaToday'}, inplace = True)
tvm_index.rename(columns = {'EPUI':'EPUI_TVM'}, inplace = True)

# Join Datasets
epu = times_index.merge(independent_index, how='left', on='Date')
epu = epu.merge(malta_today_index, how='left', on='Date')
epu = epu.merge(tvm_index, how='left', on='Date')

print('EPU Dataframe Size :', epu.shape)
epu.info()

EPU Dataframe Size : (253, 7)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 253 entries, 0 to 252
Data columns (total 7 columns):
 #   Column            Non-Null Count  Dtype         
---  ------            --------------  -----         
 0   Date              253 non-null    datetime64[ns]
 1   Year              253 non-null    int64         
 2   Month             253 non-null    int64         
 3   EPUI_Times        253 non-null    float64       
 4   EPUI_Independent  242 non-null    float64       
 5   EPUI_MaltaToday   223 non-null    float64       
 6   EPUI_TVM          116 non-null    float64       
dtypes: datetime64[ns](1), float64(4), int64(2)
memory usage: 14.0 KB


In [11]:
# Get Row Means of EPU Indices
epu['EPU'] = epu[['EPUI_Times', 'EPUI_Independent', 'EPUI_MaltaToday', 'EPUI_TVM']].mean(axis=1, skipna=True)

In [12]:
# Normalise Index with mean 100
epu_mean = epu['EPU'].mean()
epu['EPU'] = (epu['EPU'] * 100) / epu_mean
print('EPU')
print(epu.head())
print('...')
print(epu.tail())

EPU
        Date  Year  Month  EPUI_Times  EPUI_Independent  EPUI_MaltaToday  \
0 2004-01-01  2004      1    1.027015               NaN         3.327752   
1 2004-02-01  2004      2    0.553586               NaN         0.000000   
2 2004-03-01  2004      3    0.899702               NaN         0.000000   
3 2004-04-01  2004      4    0.520276               NaN         1.337043   
4 2004-05-01  2004      5    1.342085               NaN         3.303284   

   EPUI_TVM         EPU  
0       NaN  147.851131  
1       NaN   18.795109  
2       NaN   30.546285  
3       NaN   63.058876  
4       NaN  157.717506  
...
          Date  Year  Month  EPUI_Times  EPUI_Independent  EPUI_MaltaToday  \
248 2024-09-01  2024      9    1.471444          2.320151         0.504205   
249 2024-10-01  2024     10    1.133671          2.778202         0.441737   
250 2024-11-01  2024     11    0.790608          2.631715         2.339826   
251 2024-12-01  2024     12    2.530679          2.482750         0

In [13]:
# Save Dataframes
epu.to_csv('EPU.csv', index=False)