1. [Working directory, packages and data](#chapter1)
2. [Preprocessing](#chapter2)
3. [Dictionaries](#chapter3)
4. [Wrapper function](#chapter4)
4. [Rooduijn & Pauwels](#chapter5)
   1. [Construct validity](#subparagraph1)
   2. [Face validity](#subparagraph2)
   3. [External validity](#subparagraph3)
       1. [CHES](#subparagraph4)
       2. [PopuList](#subparagraph5)
5. [Decadri & Boussalis](#chapter6)
   1. [Construct validity](#subparagraph6)
   2. [Face validity](#subparagraph7)
   3. [External validity](#subparagraph8)
       1. [CHES](#subparagraph9)
       2. [PopuList](#subparagraph10)

# Working directory, packages and data <a class="anchor" id="chapter1"></a>

Setting the working directory

In [1]:
setwd("C:/Users/jacop/Tesi/")

Loading the libraries

In [2]:
suppressWarnings(suppressPackageStartupMessages(library(dtplyr)))
suppressWarnings(suppressPackageStartupMessages(library(tidyverse)))
suppressWarnings(suppressPackageStartupMessages(library(data.table)))
suppressWarnings(suppressPackageStartupMessages(library(quanteda)))

Loading the data

In [3]:
load("data/parliamentary_groups2.rds")

Creating a lazy data.table out of our dataframe so that we can use dtplyr on it

In [4]:
texts <- lazy_dt(Texts)

Casting the "legislatura" variable as numeric

In [5]:
texts <- texts %>% mutate(legislatura = as.integer(legislatura)) %>% as_tibble()

Filtering the dataset by focusing on the last seven legislatures

In [6]:
texts <- texts %>% filter(legislatura >= 12) %>% as_tibble()

Decadri and Boussalis' additional stopwords

In [7]:
db_additional_stopwords  <- suppressMessages(read_csv("data/it_stopwords_new_list.csv")) %>% 
                            pull(stopwords)

Procedural stopwords

In [8]:
procedural_stopwords <- suppressMessages(read_csv("data/it_stopwords_procedural.csv")) %>% 
                        pull(it_stopwords_procedural)

# Preprocessing <a class="anchor" id="chapter2"></a>

Creating the corpus, tokenizing it and removing the stopwords

In [9]:
my_corpus <- corpus(texts, text_field = "textclean")

toks <- my_corpus %>% 
        tokens(., remove_punct = TRUE, remove_symbols = TRUE, remove_numbers = TRUE, remove_separators = TRUE)  %>% 
        tokens_remove(., pattern = stopwords("it"), padding = TRUE) %>% 
        tokens_remove(., pattern = db_additional_stopwords) %>% 
        tokens_remove(., pattern = procedural_stopwords)

# Dictionaries <a class="anchor" id="chapter3"></a>

Rooduijn and Pauwels' dictionary

In [10]:
anti_elitism <- c("elit*", "consens*", "antidemocratic*", "referend*", "corrot*", "propagand*", 
                  "politici*","ingann*", "tradi*", "vergogn*", "scandal*", "verita", "disonest*", 
                  "partitocrazia", "menzogn*", "mentir*")

rp_dictionary <- dictionary(list(anti_elitism = anti_elitism))

Decadri and Boussalis' dictionary

In [11]:
anti_elitism <- c("antidemocratic*", "casta", "consens*", "corrot*", "disonest*", "elit*", 
                  "establishment", "ingann*", "mentir*", "menzogn*", "partitocrazia", "propagand*", 
                  "scandal*", "tradim*", "tradir*", "tradit*", "vergogn*", "verita")

people_centrism  <- c("abitant*", "cittadin*", "consumator*", "contribuent*", "elettor*", "gente", "popol*")

db_dictionary <- dictionary(list(anti_elitism = anti_elitism, 
                                 people_centrism = people_centrism))

# Wrapper function <a class="anchor" id="chapter4"></a>

In [12]:
dict_analysis <- function(tokens, dictionary) {
    
  # Applying Rooduijn and Pauwels' dictionary  
  
  if (dictionary == "Rooduijn_Pauwels") {
  
  my_dict_lookup <- tokens_lookup(x = tokens, dictionary = rp_dictionary)
  
  dat <- dfm(my_dict_lookup) %>% 
         convert(., to = "data.frame") %>% 
         mutate(party = docvars(my_corpus)$gruppoP,
                year = docvars(my_corpus)$year,
                group_cluster = docvars(my_corpus)$group_cluster,
                total_toks = ntoken(tokens),
                perc_of_populist_toks = anti_elitism / total_toks,
                standardized_perc_of_populist_toks = scale(perc_of_populist_toks)) %>% 
         relocate(doc_id, party, year, group_cluster, anti_elitism, total_toks, perc_of_populist_toks, 
                 perc_of_populist_toks, standardized_perc_of_populist_toks) %>% 
         as_tibble()

  }
    
  # Applying Decadri and Boussalis' dictionary
  
  if (dictionary == "Decadri_Boussalis") {
    
    my_dict_lookup <- tokens_lookup(x = tokens, dictionary = db_dictionary)
    
    dat <- dfm(my_dict_lookup) %>% 
           convert(., to = "data.frame") %>% 
           mutate(party = docvars(my_corpus)$gruppoP,
                  year = docvars(my_corpus)$year,
                  group_cluster = docvars(my_corpus)$group_cluster,
                  total_toks = ntoken(tokens),
                  populist_toks = anti_elitism + people_centrism,
                  perc_of_populist_toks = populist_toks / total_toks,
                  standardized_perc_of_populist_toks = scale(perc_of_populist_toks)) %>% 
          relocate(doc_id, party, year, group_cluster, anti_elitism, people_centrism, 
                   populist_toks, total_toks, perc_of_populist_toks, perc_of_populist_toks,
                   standardized_perc_of_populist_toks) %>% 
          as_tibble()
    
  }
  
  return(dat)
  
}


# Rooduijn & Pauwels <a class="anchor" id="chapter5"></a>

Let's run the dictionary analysis by using Roodujin and Pauwels' dictionary

In [13]:
df_rp <- dict_analysis(tokens = toks, dictionary = "Rooduijn_Pauwels")

The first rows of the dataframe

In [14]:
head(df_rp) 

doc_id,party,year,group_cluster,anti_elitism,total_toks,perc_of_populist_toks,standardized_perc_of_populist_toks
<chr>,<chr>,<int>,<chr>,<dbl>,<int>,<dbl>,"<dbl[,1]>"
text1,F-ITA,1994,9_evolutionGroup,0,13,0,-0.1561156
text2,PROGR-F,1994,1_evolutionGroup,0,3,0,-0.1561156
text3,LEGA-N,1994,10_evolutionGroup,0,40,0,-0.1561156
text4,RC-PROGR,1994,12_evolutionGroup,0,3,0,-0.1561156
text5,PROGR-F,1994,1_evolutionGroup,0,4,0,-0.1561156
text6,PPI,1994,2_evolutionGroup,0,3,0,-0.1561156


## Construct validity <a class="anchor" id="subparagraph1"></a>

Rooduijn and Pauwels' dictionary captures the "anti-elitism" component of populism, but not the "people-centrism" one. As a result, from a construct validity standpoint, it is only partially valid. The authors motivated the decision to leave out the "people-centrism" dimension by pointing out that the "people" is often referenced to by words such as "us", "we" and "our" which are also used to reference entities other than the people (such as political parties). The inclusion of these words in the dictionary, they argue, would result in a large number of false positives.

## Face validity <a class="anchor" id="subparagraph2"></a>

A populist dictionary has face validity if the allegedly populist parties are indeed populist. In the Italian case, we would expect populist values to be higher for parties that the literature deems populist (i.e. Five Star Movement, Lega Nord, Forza Italia and Il Popolo delle Libertà).

Let's first run an Analysis Of Variance (ANOVA) by using the % of populist tokens as our dependent variable and the party (i.e. gruppoP) as our predictor. 

The results indicate that the differences in the % of populist tokens between parties are statistically significant.

In [17]:
anova_rp <- aov(perc_of_populist_toks ~ party, data = df_rp)

summary(anova_rp)

                Df Sum Sq   Mean Sq F value Pr(>F)    
party           54  0.010 1.897e-04   7.475 <2e-16 ***
Residuals   297559  7.551 2.538e-05                   
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
59 observations deleted due to missingness

We can assess how Rooduijn and Pauwels' dictionary fares in terms of face validity by grouping the results of the dictionary analysis by year and party and computing the average % of populist tokens (both standardized and unstandardized). 

The following are the 20 party-year combinations with the highest populist score in the 1994-2021 period. Consistently with our expectations, we find populist parties such as FdI-AN (2013), FdI (2018), Forza Italia (2019) and FdI (2018). However, we also find mainstream parties such as UDC (2009), SI-SEL-POS-LU (2018), IV (2018), IV (2019) and PD (2019). These results could be interpreted as evidence of either populist contagion or lack of face validity. The absence of M5S and Lega among the most populist parties makes me lean towards the latter.

In [18]:
df_rp %>% 
group_by(party, year) %>% 
summarize(mean_perc_of_populist_toks = mean(perc_of_populist_toks, na.rm = TRUE),
          mean_standardized_perc_of_populist_toks = mean(standardized_perc_of_populist_toks, na.rm = TRUE),
         .groups = "keep") %>% 
arrange(desc(mean_standardized_perc_of_populist_toks)) %>% 
head(20)

party,year,mean_perc_of_populist_toks,mean_standardized_perc_of_populist_toks
<chr>,<int>,<dbl>,<dbl>
FLD,1996,0.004303007,0.6975892
UNIONE DEI DEMOCRATICI CRISTIANI E DEI DEMOCRATICI DI CENTRO,2009,0.002923977,0.4239933
PPI,1996,0.002187874,0.2779526
IV,2018,0.002126375,0.2657513
SI-SEL-POS-LU,2018,0.002084685,0.2574802
FLD,1994,0.002027889,0.2462121
FDI-AN,2013,0.002020914,0.2448282
SI-SEL-POS-LU,2016,0.001993743,0.2394375
DEMO,1996,0.001993305,0.2393506
UDR,2001,0.00180008,0.2010152


The following are the party-year combinations with the lowest populist scores. Again we see a mixture of both mainstream and populist parties. Interestingly, LNA (2018), FdI-AN (2018) and PdL (2013) are ranked among the least populist parties. This might be further evidence of lack of face validity in Rooduijn and Pauwels' dictionary.

In [19]:
df_rp %>% 
group_by(party, year) %>% 
summarize(mean_perc_of_populist_toks = mean(perc_of_populist_toks, na.rm = TRUE),
          mean_standardized_perc_of_populist_toks = mean(standardized_perc_of_populist_toks, na.rm = TRUE),
          .groups = "keep") %>% 
arrange(desc(mean_standardized_perc_of_populist_toks)) %>% 
tail(20)

party,year,mean_perc_of_populist_toks,mean_standardized_perc_of_populist_toks
<chr>,<int>,<dbl>,<dbl>
RC,2006,0.000199689,-0.1164979
DCA-NPSI,2006,0.0001354899,-0.1292348
DES-CD,2014,9.646189e-05,-0.1369779
VERDI,2006,9.439643e-05,-0.1373876
UDEUR,2001,3.565062e-05,-0.1490426
AP-CPE-NCD-NCI,2018,0.0,-0.1561156
CI,2018,0.0,-0.1561156
COM/IT/,2008,0.0,-0.1561156
DCA-NPSI,2008,0.0,-0.1561156
DCA-NPSI,2009,0.0,-0.1561156


## External validity <a class="anchor" id="subparagraph3"></a>

### Chapel Hill Expert Survey <a class="anchor" id="subparagraph4"></a>

As Rooduijn and Pauwels' dictionary only captures the anti-elite dimension of populism, the external validity will be carried out against the anti-elite salience variable from the CHES dataset, which has been introduced in 2014.

Let's load the CHES dataset

In [20]:
ches <- read_csv("data/1999-2019_CHES_dataset_means(v2).csv", show_col_types = FALSE)

The countrycode for Italy is 8. The following is a list of all Italian parties in the CHES dataset in the 2014-2019 time period.

In [21]:
ches %>% filter(country == 8 & year >= 2014 & year <= 2019) %>% distinct(party)

party
<chr>
UDC
SC
VdA
PD
FI
LN
FdI
SEL
M5S
CD


While these are the parties included in our dataset in the same timeframe

In [22]:
df_rp %>% filter(year >= 2014 & year <= 2019) %>% distinct(party)

party
<chr>
CI
PD
FI-PDL
M5S
MDP-LU
NCI-SCPI-MAIE
MISTO
SI-SEL-POS-LU
AP-CPE-NCD-NCI
LNA


'Vallée d'Aoste', 'Südtiroler Volkspartei' and 'Radicali Italiani' are not part of our dataset, so let's drop them from the CHES dataset.

In [23]:
to_drop <- c('VdA', 'SVP', 'RI')

ches <- ches %>% 
        filter(country == 8 & year >= 2014 & year <= 2019 & (!party %in% to_drop)) 

Let's now compare how R&P' dictionary and the CHES dataset ranked party-year combinations by populism in 2014 and 2019. We'll drop the "Mixed group" and "Italia Viva" as these two parliamentary groups are absent from the CHES dataset.

The difference between the two rankings is stark. According to the dictionary analysis, PD (2019) ranks among the most populist party-year combinations and M5S (2019) among the least populists ones, while the opposite is true in the CHES dataset. Moreover, Lega (2019), one of the most populist party-year combinations according to CHES, is only slightly populist according to R&P' dictionary.

In [25]:
df_rp %>% 
filter((year == 2014 | year == 2019) & party != "MISTO" & party != "IV") %>% 
group_by(party, year) %>% 
summarize(mean_perc_of_populist_toks = mean(perc_of_populist_toks, na.rm = TRUE),
          mean_standardized_perc_of_populist_toks = mean(standardized_perc_of_populist_toks, na.rm = TRUE),
          .groups = "keep") %>% 
arrange(desc(mean_perc_of_populist_toks))

party,year,mean_perc_of_populist_toks,mean_standardized_perc_of_populist_toks
<chr>,<int>,<dbl>,<dbl>
FI,2019,0.001749259,0.190932508
PD,2019,0.001550357,0.151470894
FDI,2019,0.001281956,0.098220937
FDI-AN,2014,0.001201482,0.08225507
MDP-LU,2014,0.001053005,0.052797699
AP-CPE-NCD-NCI,2014,0.001022024,0.046651198
LNA,2014,0.0009611932,0.034582468
M5S,2014,0.000960295,0.034404271
NCI-SCPI-MAIE,2014,0.0009518141,0.032721678
LEU,2019,0.0008481808,0.012161113


In [26]:
ches %>% 
group_by(party, year) %>% 
summarize(mean_anti_elite_salience = mean(antielite_salience), .groups = "keep") %>% 
arrange(desc(mean_anti_elite_salience))

party,year,mean_anti_elite_salience
<chr>,<dbl>,<dbl>
M5S,2014,10.0
RC,2014,9.333333
M5S,2019,8.888889
LN,2014,8.8
LN,2019,8.333333
FdI,2019,8.0
SEL,2014,6.8
FdI,2014,6.25
PD,2014,4.4
FI,2019,4.176471


### The PopuList <a class="anchor" id="subparagraph5"></a>

Reading the PopuList dataset

In [27]:
populist <- readxl::read_xlsx("data/populist-version-2-20200626.xlsx")

All the Italian parties in the PopuList dataset

In [28]:
populist %>% filter(country_name == "Italy") %>% distinct(party_name)

party_name
<chr>
Fiamma Tricolore
Forza Italia – Il Popolo della Libertà
Fratelli d'Italia – Centrodestra Nazionale
Il Popolo della Libertà
Lega (Nord)
Lega d'Azione Meridionale
Liga Veneta
Movimento 5 Stelle
Movimento Sociale Italiano
Partito dei Comunisti Italiani


"Fiamma tricolore", "Lega d'Azione Meridionale", "Movimento Sociale Italiano" are not in our dataset. So let's drop them from the PopuList dataset.

In [29]:
to_drop <- c("Fiamma Tricolore", "Lega d'Azione Meridionale", "Movimento Sociale Italiano")

populist <- populist %>% 
filter(country_name == "Italy" & (!party_name %in% to_drop))

Let's compare the populism scores between PopuList and R&D' dictionary by focusing on those parties that are present in both datasets. There is no year variable in the PopuList dataset so we're only grouping by party.  

According to the dictionary analysis, FI-PDL, FdI, Lega and M5S have higher populism scores compared to most parties. These parties are all coded as populist in the PopuList dataset. The two measures can thus be considered similar.

In [30]:
populist %>% 
group_by(party_name) %>% 
summarize(mean_populist = mean(populist)) %>% 
arrange(desc(mean_populist))

party_name,mean_populist
<chr>,<dbl>
Forza Italia – Il Popolo della Libertà,1
Fratelli d'Italia – Centrodestra Nazionale,1
Il Popolo della Libertà,1
Lega (Nord),1
Liga Veneta,1
Movimento 5 Stelle,1
Partito dei Comunisti Italiani,0
Partito della Rifondazione Comunista,0
Rivoluzione Civile,0
Sinistra,0


In [32]:
to_keep <- c("F-ITA", "FI", "PDL", "FI-PDL", "FDI-AN", "FDI", "LEGA-N", "LEGA-NORD-P", "LNA", "LEGA", "LNP", "M5S", 
             "RC-PROGR", "COMUNISTA", "RC", "COM/IT/", "RC-SE", "SI-SEL-POS-LU")

df_rp %>% 
filter(party %in% to_keep) %>% 
group_by(party) %>% 
summarize(mean_perc_of_populist_toks = mean(perc_of_populist_toks, na.rm = TRUE),
          mean_standardized_perc_of_populist_toks = mean(standardized_perc_of_populist_toks, na.rm = TRUE)) %>% 
arrange(desc(mean_perc_of_populist_toks))

party,mean_perc_of_populist_toks,mean_standardized_perc_of_populist_toks
<chr>,<dbl>,<dbl>
FDI,0.001376835,0.117044671
FDI-AN,0.001329138,0.107581718
COMUNISTA,0.0012596427,0.093794031
SI-SEL-POS-LU,0.0012010564,0.082170678
LEGA-NORD-P,0.0011313917,0.068349389
RC,0.0009854664,0.039398218
M5S,0.0009845364,0.039213693
RC-PROGR,0.0009804464,0.03840226
FI-PDL,0.0009724746,0.036820669
LNA,0.0009143046,0.025279909


# Decadri & Boussalis <a class="anchor" id="chapter6"></a>

Let's run the dictionary analysis with Decadri and Boussalis' dictionary

In [33]:
df_db <- dict_analysis(tokens = toks, dictionary = "Decadri_Boussalis")

The first rows of the dataframe

In [34]:
head(df_db)

doc_id,party,year,group_cluster,anti_elitism,people_centrism,populist_toks,total_toks,perc_of_populist_toks,standardized_perc_of_populist_toks
<chr>,<chr>,<int>,<chr>,<dbl>,<dbl>,<dbl>,<int>,<dbl>,"<dbl[,1]>"
text1,F-ITA,1994,9_evolutionGroup,0,0,0,13,0,-0.2931703
text2,PROGR-F,1994,1_evolutionGroup,0,0,0,3,0,-0.2931703
text3,LEGA-N,1994,10_evolutionGroup,0,0,0,40,0,-0.2931703
text4,RC-PROGR,1994,12_evolutionGroup,0,0,0,3,0,-0.2931703
text5,PROGR-F,1994,1_evolutionGroup,0,0,0,4,0,-0.2931703
text6,PPI,1994,2_evolutionGroup,0,0,0,3,0,-0.2931703


## Construct validity <a class="anchor" id="subparagraph6"></a>

Decadri and Boussalis' dictionary catpures both the "anti-elitism" and "people-centrism" dimenions of populist ideology and it thus constitutes an improvement over Rooduijn and Pauwels' dictionary in terms of construct validity.

## Face validity <a class="anchor" id="subparagraph7"></a>

Let's run an Analysis Of Variance (ANOVA) as we did before. Again, the difference in the % of populist tokens between parties is statistically significant.

In [35]:
anova_db <- aov(perc_of_populist_toks ~ party, data = df_db)

summary(anova_db)

                Df Sum Sq   Mean Sq F value Pr(>F)    
party           54  0.153 0.0028286   39.97 <2e-16 ***
Residuals   297559 21.060 0.0000708                   
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
59 observations deleted due to missingness

To assess the face validity of Decadri and Boussalis' dictionary we'll have a look at the mean % of populist tokens (both anti-establishment and people-centrism) grouped by party and year.

As it was the case for R&P' dictionary, both mainstream (UDC, UDEUR, PPI) and populist (Lega, M5S, FI-PdL) party-year combinations received high populist scores.

In [36]:
df_db %>% 
group_by(party, year) %>% 
summarize(mean_perc_of_populist_toks = mean(perc_of_populist_toks, na.rm = TRUE),
          mean_standardized_perc_of_populist_toks = mean(standardized_perc_of_populist_toks, na.rm = TRUE),
          .groups = "keep") %>% 
arrange(desc(mean_standardized_perc_of_populist_toks)) %>% 
head(20)

party,year,mean_perc_of_populist_toks,mean_standardized_perc_of_populist_toks
<chr>,<int>,<dbl>,<dbl>
UNIONE DEI DEMOCRATICI CRISTIANI E DEI DEMOCRATICI DI CENTRO,2009,0.028508772,3.0836409
SOCRAD-RNP,2009,0.011621573,1.0833835
POP-UDEUR,2008,0.010379357,0.9362453
VERDI,2008,0.007892093,0.6416335
UNIONE DEI DEMOCRATICI CRISTIANI E DEI DEMOCRATICI DI CENTRO,2008,0.007494466,0.5945354
PPI,1996,0.007464757,0.5910164
DCA-NPSI,2008,0.00735024,0.577452
FLPTP,2008,0.006388134,0.4634924
FLD,1996,0.005873414,0.4025247
AP-CPE-NCD-NCI,2015,0.005704498,0.3825169


Similarly, when we look at the party-year combinations with the lowest populist scores we find both mainstream and populist parties. This seems to suggest that D&B' dictionary lacks face validity.

In [37]:
df_db %>% 
group_by(party, year) %>% 
summarize(mean_perc_of_populist_toks = mean(perc_of_populist_toks, na.rm = TRUE),
          mean_standardized_perc_of_populist_toks = mean(standardized_perc_of_populist_toks, na.rm = TRUE),
          .groups = "keep") %>% 
arrange(desc(mean_standardized_perc_of_populist_toks)) %>% 
tail(20)

party,year,mean_perc_of_populist_toks,mean_standardized_perc_of_populist_toks
<chr>,<int>,<dbl>,<dbl>
DES-CD,2014,0.0011174415,-0.1608114
FI,2002,0.0011147634,-0.1611287
AN,2002,0.0011103368,-0.161653
RINN/IT,1999,0.0010955878,-0.1634
CCD-CDU,2002,0.0010161716,-0.1728067
FI-PDL,2018,0.0009904505,-0.1758533
DES-CD,2015,0.0008868901,-0.1881198
DES-CD,2016,0.0006750166,-0.2132158
RC-SE,2009,0.0006309148,-0.2184396
SOCRAD-RNP,2008,0.0004388236,-0.2411925


## External validity <a class="anchor" id="subparagraph8"></a>

### Chapel Hill Expert Survey <a class="anchor" id="subparagraph9"></a>

As Decadri and Boussalis' dictionary captures both dimensions of populism we will validate it against a combination of two different variables from the CHES dataset, i.e. "anti-élite salience" and "people_vs_élite". We'll use the former as a proxy for the anti-establishment component and the latter as a proxy for the people-centrist one. The "people_vs_élite" variable has been introduced in the 2019 edition of the dataset, so we'll only work with observations from this year.

In [38]:
ches <- read_csv("data/1999-2019_CHES_dataset_means(v2).csv", show_col_types = FALSE)

The following are the Italian parties in the CHES dataset for the year 2019

In [39]:
ches %>% filter(country == 8 & year == 2019) %>% select(party, antielite_salience, people_vs_elite)

party,antielite_salience,people_vs_elite
<chr>,<dbl>,<dbl>
RI,2.2,3.357143
M5S,8.888889,9.529411
SI,3.785714,2.666667
FdI,8.0,6.625
PD,1.882353,2.0625
LN,8.333333,6.9375
SVP,2.166667,1.4
FI,4.176471,4.066667


The parties in our dataset in the same year

In [40]:
df_db %>% filter(year == 2019) %>% distinct(party)

party
<chr>
M5S
LEGA
PD
IV
FI
FDI
MISTO
LEU


"Radicali Italiani" and "Südtiroler Volkspartei" are not in our dataset so we'll drop them from CHES

In [41]:
to_drop <- c("RI", "SVP")

ches <- ches %>% 
filter(country == 8 & year == 2019 & (!party %in% to_drop))

Let's compute the average populist value for each party in the CHES dataset by summing the people vs elite and the anti-elite salience variables and then taking the mean

In [42]:
ches %>% 
group_by(party) %>% 
summarize(mean_populism = mean(people_vs_elite + antielite_salience)) %>% 
arrange(desc(mean_populism))

party,mean_populism
<chr>,<dbl>
M5S,18.418301
LN,15.270833
FdI,14.625
FI,8.243137
SI,6.452381
PD,3.944853


The two rankings are rather different. According to CHES, M5S and Lega rank as the two most populist parties, whereas in the results of the dictionary analysis they turned out to be the least populist ones.

In [44]:
to_drop <- c("IV", "MISTO")

df_db %>% 
filter(year == 2019 & (! party %in% to_drop)) %>% 
group_by(party) %>% 
summarize(mean_perc_of_populist_toks = mean(perc_of_populist_toks, na.rm = TRUE),
          mean_standardized_perc_of_populist_toks = mean(standardized_perc_of_populist_toks, na.rm = TRUE)) %>% 
arrange(desc(mean_perc_of_populist_toks))

party,mean_perc_of_populist_toks,mean_standardized_perc_of_populist_toks
<chr>,<dbl>,<dbl>
FI,0.003968809,0.17692773
FDI,0.003611838,0.13464516
LEU,0.003213033,0.08740749
PD,0.003152215,0.08020377
LEGA,0.00233055,-0.01712103
M5S,0.002276121,-0.02356803


### The PopuList <a class="anchor" id="subparagraph10"></a>

Let's now compare D&B' dictionary with the PopuList dataset.

Lega, FdI, FI/PdL and M5S rank among the most populist parties according to D&B' dictionary. These parties have all been coded as populist by PopuList. The two measures can thus be considered to be similar.

In [45]:
populist %>% 
group_by(party_name) %>% 
summarize(mean_populist = mean(populist)) %>% 
arrange(desc(mean_populist))

party_name,mean_populist
<chr>,<dbl>
Forza Italia – Il Popolo della Libertà,1
Fratelli d'Italia – Centrodestra Nazionale,1
Il Popolo della Libertà,1
Lega (Nord),1
Liga Veneta,1
Movimento 5 Stelle,1
Partito dei Comunisti Italiani,0
Partito della Rifondazione Comunista,0
Rivoluzione Civile,0
Sinistra,0


In [47]:
to_keep <- c("F-ITA", "FI", "PDL", "FI-PDL", "FDI-AN", "FDI", "LEGA-N", "LEGA-NORD-P", "LNA", "LEGA", "LNP", "M5S", 
             "RC-PROGR", "COMUNISTA", "RC", "COM/IT/", "RC-SE", "SI-SEL-POS-LU")

df_db %>% 
filter(party %in% to_keep) %>% 
group_by(party) %>% 
summarize(mean_perc_of_populist_toks = mean(perc_of_populist_toks, na.rm = TRUE),
          mean_standardized_perc_of_populist_toks = mean(standardized_perc_of_populist_toks, na.rm = TRUE)) %>% 
arrange(desc(mean_perc_of_populist_toks))

party,mean_perc_of_populist_toks,mean_standardized_perc_of_populist_toks
<chr>,<dbl>,<dbl>
LNA,0.004457382,0.234798281
FDI-AN,0.004030381,0.184220866
FI-PDL,0.003836494,0.161255302
M5S,0.003622699,0.135931629
PDL,0.003558247,0.128297458
SI-SEL-POS-LU,0.003363589,0.105240636
LEGA-NORD-P,0.003347112,0.103288956
FDI,0.003296703,0.09731803
LNP,0.003296006,0.097235449
COMUNISTA,0.002970165,0.058640207
