1. [Working directory, packages and data](#chapter1)
2. [Preprocessing](#chapter2)
3. [Dictionaries](#chapter3)
4. [Wrapper function](#chapter4)
4. [Rooduijn & Pauwels](#chapter5)
   1. [Construct validity](#subparagraph1)
   2. [Face validity](#subparagraph2)
   3. [External validity](#subparagraph3)
       1. [CHES](#subparagraph4)
       2. [PopuList](#subparagraph5)
5. [Decadri & Boussalis](#chapter6)
   1. [Construct validity](#subparagraph6)
   2. [Face validity](#subparagraph7)
   3. [External validity](#subparagraph8)
       1. [CHES](#subparagraph9)
       2. [PopuList](#subparagraph10)

# Working directory, packages and data <a class="anchor" id="chapter1"></a>

Setting the working directory

In [1]:
setwd("C:/Users/jacop/Tesi/")

Loading the libraries

In [2]:
suppressWarnings(suppressPackageStartupMessages(library(dtplyr)))
suppressWarnings(suppressPackageStartupMessages(library(tidyverse)))
suppressWarnings(suppressPackageStartupMessages(library(data.table)))
suppressWarnings(suppressPackageStartupMessages(library(quanteda)))

Loading the data

In [3]:
load("data/parliamentary_groups2.rds")

Creating a lazy data.table out of our dataframe so that we can use dtplyr on it

In [4]:
texts <- lazy_dt(Texts)

Casting the "legislatura" variable as numeric

In [5]:
texts <- texts %>% mutate(legislatura = as.integer(legislatura)) %>% as_tibble()

Filtering the dataset by focusing on the last seven legislatures

In [6]:
texts <- texts %>% filter(legislatura >= 12) %>% as_tibble()

Decadri and Boussalis' additional stopwords

In [7]:
db_additional_stopwords  <- suppressMessages(read_csv("data/it_stopwords_new_list.csv")) %>% 
                            pull(stopwords)

Procedural stopwords

In [8]:
procedural_stopwords <- suppressMessages(read_csv("data/it_stopwords_procedural.csv")) %>% 
                        pull(it_stopwords_procedural)

# Preprocessing <a class="anchor" id="chapter2"></a>

Creating the corpus

In [9]:
my_corpus <- corpus(texts, text_field = "textclean")

Tokenizing the texts

In [10]:
toks <- tokens(my_corpus, 
               remove_punct = TRUE, 
               remove_symbols = TRUE, 
               remove_numbers = TRUE, 
               remove_separators = TRUE,
               padding = TRUE)

Removing the default stopwords

In [11]:
cleaned_toks <- tokens_remove(toks, pattern = stopwords("it"))

Removing the additional stopwords

In [12]:
cleaned_toks <- tokens_remove(cleaned_toks, pattern = db_additional_stopwords)

Removing the procedural stopwords

In [13]:
cleaned_toks <- tokens_remove(cleaned_toks, pattern = procedural_stopwords)

# Dictionaries <a class="anchor" id="chapter3"></a>

Rooduijn and Pauwels' dictionary

In [14]:
anti_elitism <- c("elit*", "consens*", "antidemocratic*", "referend*", "corrot*", "propagand*", 
                  "politici*","ingann*", "tradi*", "vergogn*", "scandal*", "verita", "disonest*", 
                  "partitocrazia", "menzogn*", "mentir*")

rp_dictionary <- dictionary(list(anti_elitism = anti_elitism))

Decadri and Boussalis' dictionary

In [15]:
anti_elitism <- c("antidemocratic*", "casta", "consens*", "corrot*", "disonest*", "elit*", 
                  "establishment", "ingann*", "mentir*", "menzogn*", "partitocrazia", "propagand*", 
                  "scandal*", "tradim*", "tradir*", "tradit*", "vergogn*", "verita")

people_centrism  <- c("abitant*", "cittadin*", "consumator*", "contribuent*", "elettor*", "gente", "popol*")

db_dictionary <- dictionary(list(anti_elitism = anti_elitism, 
                                 people_centrism = people_centrism))

# Wrapper function <a class="anchor" id="chapter4"></a>

We'll run the dictionary analyses with a wrapper function. We'll use the "dictionary" argument to specify which dictionary we'll use each time. In particular:

- no stemming will be applied;
- all stopwords will be removed, including Quantedas', Decadri & Boussalis' and the procedural ones;
- all rows from the dataset will be included.

In [16]:
dict_analysis <- function(tokens, dictionary) {
    
  # Applying the dictionary to the tokens
  
  if (dictionary == "Rooduijn_Pauwels") {
    
    mydict_toks <- tokens_lookup(x = tokens, dictionary = rp_dictionary)
    
  } else if (dictionary == "Decadri_Boussalis") {
    
    mydict_toks <- tokens_lookup(x = tokens, dictionary = db_dictionary)
    
  } else {
    
    stop("Invalid dictionary")
      
  }
  
  # Building the dfm
    
  my_dfm <- convert(dfm(mydict_toks), to = 'data.frame')
    
  # Extracting the number of clean tokens for each doc
  
  doc_length <- map_int(tokens, ~ length(.)) 
    
  # Storing the number of clean tokens in a tibble along with the doc IDs, the party and year variables  
  
  tbl <- tibble(doc_id = names(tokens), 
                n_of_cleaned_toks = doc_length, 
                party = docvars(my_corpus) %>% pull(gruppoP),
                year = docvars(my_corpus) %>% pull(year))
    
  # Joining the tibble we just created to the dfm
  
  if (dictionary == "Decadri_Boussalis") {

        my_dict_lookup <- my_dfm %>% 
                          mutate(populist_toks = anti_elitism + people_centrism) %>%  
                          left_join(tbl, by = "doc_id") %>% 
                          mutate(perc_of_populist_toks = populist_toks / n_of_cleaned_toks) %>% 
                          mutate(standardized_perc_of_populist_toks = as.double(scale(perc_of_populist_toks))) %>%  
                          mutate(perc_of_populist_toks = replace_na(perc_of_populist_toks, 0)) %>% 
                          relocate(doc_id, year, party, anti_elitism, people_centrism, populist_toks, n_of_cleaned_toks, 
                                   perc_of_populist_toks, standardized_perc_of_populist_toks)
    
        return(my_dict_lookup)
        
        } else if (dictionary == "Rooduijn_Pauwels") {
        
         my_dict_lookup <- my_dfm %>% 
                           left_join(tbl, by = "doc_id") %>% 
                           mutate(perc_of_populist_toks = anti_elitism / n_of_cleaned_toks) %>% 
                           mutate(standardized_perc_of_populist_toks = as.double(scale(perc_of_populist_toks))) %>% 
                           mutate(perc_of_populist_toks = replace_na(perc_of_populist_toks, 0)) %>% 
                           relocate(doc_id, year, party, anti_elitism, n_of_cleaned_toks, perc_of_populist_toks, 
                                    standardized_perc_of_populist_toks)
      
         return(my_dict_lookup)
  
        }
}

# Rooduijn & Pauwels <a class="anchor" id="chapter5"></a>

Let's run the dictionary analysis by using Roodujin and Pauwels' dictionary

In [22]:
df_rp <- dict_analysis(tokens = cleaned_toks, dictionary = "Rooduijn_Pauwels")

Let's save the results as an .Rdata file

In [25]:
save(df_rp, file = "data/df_rp.RData")

The first rows of the dataframe

In [23]:
head(df_rp) 

Unnamed: 0_level_0,doc_id,year,party,anti_elitism,n_of_cleaned_toks,perc_of_populist_toks,standardized_perc_of_populist_toks
Unnamed: 0_level_1,<chr>,<int>,<chr>,<dbl>,<int>,<dbl>,<dbl>
1,text1,1994,F-ITA,0,24,0,-0.1647037
2,text2,1994,PROGR-F,0,10,0,-0.1647037
3,text3,1994,LEGA-N,0,27,0,-0.1647037
4,text4,1994,RC-PROGR,0,9,0,-0.1647037
5,text5,1994,PROGR-F,0,8,0,-0.1647037
6,text6,1994,PPI,0,8,0,-0.1647037


## Construct validity <a class="anchor" id="subparagraph1"></a>

Rooduijn and Pauwels' dictionary captures the "anti-elitism" component of populism, but not the "people-centrism" one. As a result, from a construct validity standpoint, it is only partially valid. The authors motivated the decision to leave out the "people-centrism" dimension by pointing out that the "people" is often referenced to by words such as "us", "we" and "our" which are also used to reference entities other than the people (such as political parties). The inclusion of these words in the dictionary, they argue, would result in a large number of false positives.

## Face validity <a class="anchor" id="subparagraph2"></a>

A populist dictionary has face validity if the allegedly populist parties are indeed populist. In the Italian case, we would expect populist values to be higher for parties that the literature deems populist (i.e. Five Star Movement, Lega Nord, Forza Italia and Il Popolo delle Libertà).

Let's first run an Analysis Of Variance (ANOVA) by using the % of populist tokens as our dependent variable and the party (i.e. gruppoP) as our predictor. 

The results indicate that the differences in the % of populist tokens between parties are statistically significant.

In [9]:
load("data/df_rp.RData")

anova_rp <- aov(perc_of_populist_toks ~ party, data = df_rp)

summary(anova_rp)

                Df Sum Sq   Mean Sq F value Pr(>F)    
party           54  0.027 0.0005048   8.771 <2e-16 ***
Residuals   297618 17.127 0.0000575                   
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

We can assess how Rooduijn and Pauwels' dictionary fares in terms of face validity by grouping the results of the dictionary analysis by year and party and computing the average % of populist tokens (both standardized and unstandardized). 

The following are the 20 party-year combinations with the highest populist score in the 1994-2021 period. Consistently with our expectations, we find populist parties such as FdI-AN (2013), FdI (2018), Forza Italia (2019) and FdI (2018). However, we also find mainstream parties such as UDC (2009), SI-SEL-POS-LU (2018), IV (2018), IV (2019), PD (2019) and PD (2018). These results could be interpreted as evidence of either populist contagion or lack of face validity. The absence of M5S and Lega among the most populist parties makes me lean towards the latter.

In [12]:
df_rp %>% 
group_by(party, year) %>% 
summarize(mean_perc_of_populist_toks = mean(perc_of_populist_toks, na.rm = TRUE),
          mean_standardized_perc_of_populist_toks = mean(standardized_perc_of_populist_toks, na.rm = TRUE),
         .groups = "keep") %>% 
arrange(desc(mean_standardized_perc_of_populist_toks)) %>% 
head(20)

party,year,mean_perc_of_populist_toks,mean_standardized_perc_of_populist_toks
<chr>,<int>,<dbl>,<dbl>
FLD,1996,0.005839229,0.6044212
UNIONE DEI DEMOCRATICI CRISTIANI E DEI DEMOCRATICI DI CENTRO,2009,0.004694836,0.4536852
SI-SEL-POS-LU,2018,0.003844406,0.3416693
PPI,1996,0.003627454,0.313093
DEMO,1996,0.003407491,0.2841203
SI-SEL-POS-LU,2016,0.003309691,0.2712384
IV,2018,0.003292298,0.2689474
FDI-AN,2013,0.0032402,0.2620853
FLD,1994,0.002897469,0.2169418
FI,2019,0.002858297,0.2117821


The following are the party-year combinations with the lowest populist scores. Again we see a mixture of both mainstream and populist parties. Interestingly, LNA (2018), FdI-AN (2018) and PdL (2013) are ranked among the least populist parties. This might be further evidence of lack of face validity in Rooduijn and Pauwels' dictionary.

In [13]:
df_rp %>% 
group_by(party, year) %>% 
summarize(mean_perc_of_populist_toks = mean(perc_of_populist_toks, na.rm = TRUE),
          mean_standardized_perc_of_populist_toks = mean(standardized_perc_of_populist_toks, na.rm = TRUE),
          .groups = "keep") %>% 
arrange(desc(mean_standardized_perc_of_populist_toks)) %>% 
tail(20)

party,year,mean_perc_of_populist_toks,mean_standardized_perc_of_populist_toks
<chr>,<int>,<dbl>,<dbl>
DES-CD,2015,0.0003213526,-0.1223761
DCA-NPSI,2006,0.0002135572,-0.1365746
DES-CD,2014,0.0001541198,-0.1444035
VERDI,2006,0.0001411349,-0.1461138
UDEUR,2001,6.002401e-05,-0.1567975
AP-CPE-NCD-NCI,2018,0.0,-0.1647037
CI,2018,0.0,-0.1647037
COM/IT/,2008,0.0,-0.1647037
DCA-NPSI,2008,0.0,-0.1647037
DCA-NPSI,2009,0.0,-0.1647037


## External validity <a class="anchor" id="subparagraph3"></a>

### Chapel Hill Expert Survey <a class="anchor" id="subparagraph4"></a>

As Rooduijn and Pauwels' dictionary only captures the anti-elite dimension of populism, the external validity will be carried out against the anti-elite salience variable from the CHES dataset, which has been introduced in 2014.

Let's load the CHES dataset

In [15]:
ches <- read_csv("data/1999-2019_CHES_dataset_means(v2).csv", show_col_types = FALSE)

The countrycode for Italy is 8. The following is a list of all Italian parties in the CHES dataset in the 2014-2019 time period.

In [16]:
ches %>% filter(country == 8 & year >= 2014 & year <= 2019) %>% distinct(party)

party
<chr>
UDC
SC
VdA
PD
FI
LN
FdI
SEL
M5S
CD


While these are the parties included in our dataset in the same timeframe

In [17]:
df_rp %>% filter(year >= 2014 & year <= 2019) %>% distinct(party)

party
<chr>
CI
PD
FI-PDL
M5S
MDP-LU
NCI-SCPI-MAIE
MISTO
SI-SEL-POS-LU
AP-CPE-NCD-NCI
LNA


'Vallée d'Aoste', 'Südtiroler Volkspartei' and 'Radicali Italiani' are not part of our dataset, so let's drop them from the CHES dataset.

In [18]:
to_drop <- c('VdA', 'SVP', 'RI')

ches <- ches %>% 
        filter(country == 8 & year >= 2014 & year <= 2019 & (!party %in% to_drop)) 

Let's now compare how R&P' dictionary and the CHES dataset ranked party-year combinations by populism in 2014 and 2019. We'll drop the "Mixed group" and "Italia Viva" as these two parliamentary groups are absent from the CHES dataset.

The difference between the two rankings is stark. According to the dictionary analysis, PD (2019) ranks among the most populist party-year combinations and M5S (2019) among the least populists ones, while the opposite is true in the CHES dataset. Moreover, Lega (2019), one of the most populist party-year combinations according to CHES, is only slightly populist according to R&P' dictionary.

In [26]:
df_rp %>% 
filter((year == 2014 | year == 2019) & party != "MISTO" & party != "IV") %>% 
group_by(party, year) %>% 
summarize(mean_perc_of_populist_toks = mean(perc_of_populist_toks),
          mean_standardized_perc_of_populist_toks = mean(standardized_perc_of_populist_toks, na.rm = TRUE),
          .groups = "keep") %>% 
arrange(desc(mean_perc_of_populist_toks))

party,year,mean_perc_of_populist_toks,mean_standardized_perc_of_populist_toks
<chr>,<int>,<dbl>,<dbl>
FI,2019,0.0028582965,0.211782102
PD,2019,0.0026468333,0.183928833
FDI,2019,0.0020877951,0.110294089
FDI-AN,2014,0.0020435689,0.104468756
MDP-LU,2014,0.0018144723,0.074292881
AP-CPE-NCD-NCI,2014,0.0016659382,0.05472843
LNA,2014,0.0016545694,0.053230976
M5S,2014,0.0015579224,0.040500932
NCI-SCPI-MAIE,2014,0.0014953564,0.03225994
LEU,2019,0.0013378122,0.011508711


In [24]:
ches %>% 
group_by(party, year) %>% 
summarize(mean_anti_elite_salience = mean(antielite_salience), .groups = "keep") %>% 
arrange(desc(mean_anti_elite_salience))

party,year,mean_anti_elite_salience
<chr>,<dbl>,<dbl>
M5S,2014,10.0
RC,2014,9.333333
M5S,2019,8.888889
LN,2014,8.8
LN,2019,8.333333
FdI,2019,8.0
SEL,2014,6.8
FdI,2014,6.25
PD,2014,4.4
FI,2019,4.176471


### The PopuList <a class="anchor" id="subparagraph5"></a>

Reading the PopuList dataset

In [27]:
populist <- readxl::read_xlsx("data/populist-version-2-20200626.xlsx")

All the Italian parties in the PopuList dataset

In [28]:
populist %>% filter(country_name == "Italy") %>% distinct(party_name)

party_name
<chr>
Fiamma Tricolore
Forza Italia – Il Popolo della Libertà
Fratelli d'Italia – Centrodestra Nazionale
Il Popolo della Libertà
Lega (Nord)
Lega d'Azione Meridionale
Liga Veneta
Movimento 5 Stelle
Movimento Sociale Italiano
Partito dei Comunisti Italiani


"Fiamma tricolore", "Lega d'Azione Meridionale", "Movimento Sociale Italiano" are not in our dataset. So let's drop them from the PopuList dataset.

In [32]:
to_drop <- c("Fiamma Tricolore", "Lega d'Azione Meridionale", "Movimento Sociale Italiano")

populist <- populist %>% 
filter(country_name == "Italy" & (!party_name %in% to_drop))

Let's compare the populism scores between PopuList and R&D' dictionary by focusing on those parties that are present in both datasets. There is no year variable in the PopuList dataset so we're only grouping by party.  

In both cases, FI-PDL, FdI, Lega and M5S have high populism scores. The two rankings can thus be considered similar.

In [34]:
populist %>% 
group_by(party_name) %>% 
summarize(mean_populist = mean(populist)) %>% 
arrange(desc(mean_populist))

party_name,mean_populist
<chr>,<dbl>
Forza Italia – Il Popolo della Libertà,1
Fratelli d'Italia – Centrodestra Nazionale,1
Il Popolo della Libertà,1
Lega (Nord),1
Liga Veneta,1
Movimento 5 Stelle,1
Partito dei Comunisti Italiani,0
Partito della Rifondazione Comunista,0
Rivoluzione Civile,0
Sinistra,0


In [39]:
to_keep <- c("F-ITA", "FI", "PDL", "FI-PDL", "FDI-AN", "FDI", "LEGA-N", "LEGA-NORD-P", "LNA", "LEGA", "LNP", "M5S", 
             "RC-PROGR", "COMUNISTA", "RC", "COM/IT/", "RC-SE", "SI-SEL-POS-LU")

df_rp %>% 
filter(party %in% to_keep) %>% 
group_by(party) %>% 
summarize(mean_perc_of_populist_toks = mean(perc_of_populist_toks),
          mean_standardized_perc_of_populist_toks = mean(standardized_perc_of_populist_toks, na.rm = TRUE)) %>% 
arrange(desc(mean_perc_of_populist_toks))

party,mean_perc_of_populist_toks,mean_standardized_perc_of_populist_toks
<chr>,<dbl>,<dbl>
FDI,0.0022468673,0.131246579
FDI-AN,0.0021582912,0.119579612
SI-SEL-POS-LU,0.0019976159,0.098415971
COMUNISTA,0.001858069,0.080134819
LEGA-NORD-P,0.0017354533,0.063906301
FI-PDL,0.0015917287,0.045075613
M5S,0.0015764041,0.042980997
LNA,0.0015412138,0.038300131
RC-PROGR,0.0015254416,0.036399855
RC,0.0014854353,0.030953169


# Decadri & Boussalis <a class="anchor" id="chapter6"></a>

Let's run the dictionary analysis with Decadri and Boussalis' dictionary

In [32]:
df_db <- dict_analysis(tokens = cleaned_toks, dictionary = "Decadri_Boussalis")

Let's save the output as an .Rdata file

In [35]:
save(df_db, file="data/df_db.RData")

The first rows of the dataframe

In [33]:
head(df_db)

Unnamed: 0_level_0,doc_id,year,party,anti_elitism,people_centrism,populist_toks,n_of_cleaned_toks,perc_of_populist_toks,standardized_perc_of_populist_toks
Unnamed: 0_level_1,<chr>,<int>,<chr>,<dbl>,<dbl>,<dbl>,<int>,<dbl>,<dbl>
1,text1,1994,F-ITA,0,0,0,24,0,-0.2974584
2,text2,1994,PROGR-F,0,0,0,10,0,-0.2974584
3,text3,1994,LEGA-N,0,0,0,27,0,-0.2974584
4,text4,1994,RC-PROGR,0,0,0,9,0,-0.2974584
5,text5,1994,PROGR-F,0,0,0,8,0,-0.2974584
6,text6,1994,PPI,0,0,0,8,0,-0.2974584


## Construct validity <a class="anchor" id="subparagraph6"></a>

Decadri and Boussalis' dictionary catpures both the "anti-elitism" and "people-centrism" dimenions of populist ideology and it thus constitutes an improvement over Rooduijn and Pauwels' dictionary in terms of construct validity.

## Face validity <a class="anchor" id="subparagraph7"></a>

Let's run an Analysis Of Variance (ANOVA) as we did before. Again, the difference in the % of populist tokens between parties is statistically significant.

In [42]:
load("data/df_db.RData")

anova_db <- aov(perc_of_populist_toks ~ party, data = df_db)

summary(anova_db)

                Df Sum Sq  Mean Sq F value Pr(>F)    
party           54   0.46 0.008459   48.34 <2e-16 ***
Residuals   297618  52.09 0.000175                   
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

To assess the face validity of Decadri and Boussalis' dictionary we'll have a look at the mean % of populist tokens (both anti-establishment and people-centrism) grouped by party and year.

As it was the case for R&P' dictionary, both mainstream (UDC, UDEUR, PPI) and populist (Lega, M5S, FI-PdL) party-year combinations received high populist scores.

In [44]:
df_db %>% 
group_by(party, year) %>% 
summarize(mean_perc_of_populist_toks = mean(perc_of_populist_toks, na.rm = TRUE),
          mean_standardized_perc_of_populist_toks = mean(standardized_perc_of_populist_toks, na.rm = TRUE),
          .groups = "keep") %>% 
arrange(desc(mean_standardized_perc_of_populist_toks)) %>% 
head(20)

party,year,mean_perc_of_populist_toks,mean_standardized_perc_of_populist_toks
<chr>,<int>,<dbl>,<dbl>
UNIONE DEI DEMOCRATICI CRISTIANI E DEI DEMOCRATICI DI CENTRO,2009,0.044307512,3.0371805
POP-UDEUR,2008,0.017489188,1.0187998
SOCRAD-RNP,2009,0.017283407,1.0033125
VERDI,2008,0.013245044,0.6993802
UNIONE DEI DEMOCRATICI CRISTIANI E DEI DEMOCRATICI DI CENTRO,2008,0.011821234,0.5922225
PPI,1996,0.011724603,0.5849499
FLPTP,2008,0.011113047,0.5389234
FLPTP,2009,0.009916184,0.448846
AP-CPE-NCD-NCI,2015,0.009728644,0.4347316
LNA,2015,0.00906672,0.3849143


Similarly, when we look at the party-year combinations with the lowest populist scores we find both mainstream and populist parties. This seems to suggest that D&B' dictionary lacks face validity.

In [45]:
df_db %>% 
group_by(party, year) %>% 
summarize(mean_perc_of_populist_toks = mean(perc_of_populist_toks, na.rm = TRUE),
          mean_standardized_perc_of_populist_toks = mean(standardized_perc_of_populist_toks, na.rm = TRUE),
          .groups = "keep") %>% 
arrange(desc(mean_standardized_perc_of_populist_toks)) %>% 
tail(20)

party,year,mean_perc_of_populist_toks,mean_standardized_perc_of_populist_toks
<chr>,<int>,<dbl>,<dbl>
DES-CD,2014,0.0018296132,-0.1597594
AN,2002,0.0017296082,-0.1672859
FI-PDL,2018,0.0017217508,-0.1678773
FI,2002,0.0017204987,-0.1679331
RINN/IT,1999,0.0016217363,-0.1754045
CCD-CDU,2002,0.0015836051,-0.1782743
DES-CD,2015,0.0014287343,-0.1899301
DES-CD,2016,0.0009656902,-0.2247794
RC-SE,2009,0.0009302326,-0.227448
SOCRAD-RNP,2008,0.0007406179,-0.2417186


## External validity <a class="anchor" id="subparagraph8"></a>

### Chapel Hill Expert Survey <a class="anchor" id="subparagraph9"></a>

As Decadri and Boussalis' dictionary captures both dimensions of populism we will validate it against a combination of two different variables from the CHES dataset, i.e. "anti-élite salience" and "people_vs_élite". We'll use the former as a proxy for the anti-establishment component and the latter as a proxy for the people-centrist one. The "people_vs_élite" variable has been introduced in the 2019 edition of the dataset, so we'll only work with observations from this year.

In [47]:
ches <- read_csv("data/1999-2019_CHES_dataset_means(v2).csv", show_col_types = FALSE)

The following are the Italian parties in the CHES dataset for the year 2019

In [48]:
ches %>% filter(country == 8 & year == 2019) %>% select(party, antielite_salience, people_vs_elite)

party,antielite_salience,people_vs_elite
<chr>,<dbl>,<dbl>
RI,2.2,3.357143
M5S,8.888889,9.529411
SI,3.785714,2.666667
FdI,8.0,6.625
PD,1.882353,2.0625
LN,8.333333,6.9375
SVP,2.166667,1.4
FI,4.176471,4.066667


The parties in our dataset in the same year

In [49]:
df_db %>% filter(year == 2019) %>% distinct(party)

party
<chr>
M5S
LEGA
PD
IV
FI
FDI
MISTO
LEU


"Radicali Italiani" and "Südtiroler Volkspartei" are not in our dataset so we'll drop them from CHES

In [50]:
to_drop <- c("RI", "SVP")

ches <- ches %>% 
filter(country == 8 & year == 2019 & (!party %in% to_drop))

Let's compute the average populist value for each party in the CHES dataset by summing the people vs elite and the anti-elite salience variables and then taking the mean

In [51]:
ches %>% 
group_by(party) %>% 
summarize(mean_populism = mean(people_vs_elite + antielite_salience)) %>% 
arrange(desc(mean_populism))

party,mean_populism
<chr>,<dbl>
M5S,18.418301
LN,15.270833
FdI,14.625
FI,8.243137
SI,6.452381
PD,3.944853


The two rankings are rather different. According to CHES, M5S and Lega rank as the two most populist parties, whereas in the results of the dictionary analysis they turned out to be the least populist ones.

In [54]:
to_drop <- c("IV", "MISTO")

df_db %>% 
filter(year == 2019 & (! party %in% to_drop)) %>% 
group_by(party) %>% 
summarize(mean_perc_of_populist_toks = mean(perc_of_populist_toks),
          mean_standardized_perc_of_populist_toks = mean(standardized_perc_of_populist_toks, na.rm = TRUE)) %>% 
arrange(desc(mean_perc_of_populist_toks))

party,mean_perc_of_populist_toks,mean_standardized_perc_of_populist_toks
<chr>,<dbl>,<dbl>
FI,0.006516365,0.19297135
FDI,0.005751382,0.13539783
PD,0.005206037,0.09435445
LEU,0.004958768,0.07574469
LEGA,0.003747825,-0.01539243
M5S,0.003606619,-0.0259051


### The PopuList <a class="anchor" id="subparagraph10"></a>

Let's now compare D&B' dictionary with the PopuList dataset.

The two rankings seem to be similar. Both rank Lega, FdI, FI/PdL and M5S as populists.

In [55]:
populist %>% 
group_by(party_name) %>% 
summarize(mean_populist = mean(populist)) %>% 
arrange(desc(mean_populist))

party_name,mean_populist
<chr>,<dbl>
Forza Italia – Il Popolo della Libertà,1
Fratelli d'Italia – Centrodestra Nazionale,1
Il Popolo della Libertà,1
Lega (Nord),1
Liga Veneta,1
Movimento 5 Stelle,1
Partito dei Comunisti Italiani,0
Partito della Rifondazione Comunista,0
Rivoluzione Civile,0
Sinistra,0


In [56]:
to_keep <- c("F-ITA", "FI", "PDL", "FI-PDL", "FDI-AN", "FDI", "LEGA-N", "LEGA-NORD-P", "LNA", "LEGA", "LNP", "M5S", 
             "RC-PROGR", "COMUNISTA", "RC", "COM/IT/", "RC-SE", "SI-SEL-POS-LU")

df_db %>% 
filter(party %in% to_keep) %>% 
group_by(party) %>% 
summarize(mean_perc_of_populist_toks = mean(perc_of_populist_toks),
          mean_standardized_perc_of_populist_toks = mean(standardized_perc_of_populist_toks, na.rm = TRUE)) %>% 
arrange(desc(mean_perc_of_populist_toks))

party,mean_perc_of_populist_toks,mean_standardized_perc_of_populist_toks
<chr>,<dbl>,<dbl>
LNA,0.007604579,0.27487175
FDI-AN,0.006533945,0.19429446
FI-PDL,0.006307656,0.177539486
PDL,0.006023743,0.155968573
M5S,0.005847769,0.142748849
SI-SEL-POS-LU,0.005658601,0.128414989
LNP,0.005401885,0.109094202
FDI,0.005321136,0.103016917
LEGA-NORD-P,0.005151888,0.090315664
COMUNISTA,0.004441761,0.036970046
