# Processing Vectors for Analysis (out of one big Dataframe)
### Populismus

- Loading the Data
- Creating a Dataframe with every WOI and its yearly mean vectors
- Creating a similarity matrix for WOI to the TARGET WORD and its visualization
- Creating a Dataframe with every WOI and its decade mean vectors
- Creating a similarity matrix with decade mean vectors for WOI to the oneself and its visualization

## 1. Data Managing

In [1]:
import pandas as pd
import numpy as np

##### 1.1. Loading the Data Frame

In [2]:
df_vecs_populismus = pd.read_csv("df_populismus/df_vecs_populismus.csv")

In [3]:
df_vecs_populismus

Unnamed: 0,year,word,vector
0,1946.0,Chauvinismus,"[-0.02457062155008316, 0.11432712525129318, 0...."
1,1946.0,Chauvinismus,"[-0.3278478980064392, 0.5251035094261169, 0.32..."
2,1948.0,Chauvinismus,"[0.06986841559410095, -0.8528504371643066, -1...."
3,1948.0,Chauvinismus,"[0.6785620450973511, -0.6106462478637695, 0.41..."
4,1948.0,Chauvinismus,"[-0.0399584099650383, -0.14267712831497192, 0...."
...,...,...,...
5461,2000.0,Kommunitarismus,"[0.37949874997138977, -0.03819000720977783, -0..."
5462,2012.0,Kommunitarismus,"[0.11768192052841187, -0.4920748472213745, -0...."
5463,2000.0,Libertären,"[-0.055606357753276825, -0.9284834265708923, -..."
5464,2013.0,Libertären,"[0.19557200372219086, -0.9136017560958862, -0...."


In [4]:
df_vecs_republikanismus = pd.read_csv("df_populismus/df_vecs_republikanismus.csv")
df_vecs_feindbild = pd.read_csv("df_populismus/df_vecs_feindbild.csv")
df_vecs_neonazismus = pd.read_csv("df_populismus/df_vecs_neonazismus.csv")
df_vecs_aktivismus = pd.read_csv("df_populismus/df_vecs_aktivismus.csv")

#### 1.1.1. Cutting the big Dataframe into single topic Dataframes for further calculations

In [5]:
def contains_word(word, s):
    word = word.lower()
    s = s.lower()
    for char in word:
        if char not in s:
            return False
    return True

In [6]:
words_list = ["populismus", "autoritarismus",  "neoliberalismus", "totalitarismus" , 
              "radikalismus",  
              "revisionismus",  "pazifismus",  "antifaschismus", 
              "reformismus",  "islamismus",  "antikommunismus",  "kollektivismus",  "regionalismus",  
              "wohlfahrtsstaat" ,  "konservatismus", "islamophobie",  "populisten", "pluralismus" ,  
              "kommunitarismus" ,  "kosmopolitismus" , "chauvinismus",    
              "europäisierung"]

# additional WOI that were added post partum:  ["republikanismus", "feindbild", "neonazismus", "aktivismus"]
dfs = {}

for word in words_list:
    new_new_df = df_vecs_populismus[df_vecs_populismus['word'].str.contains(word, case=False)]
    dfs[word] = new_new_df

In [7]:
# loop through the words and create a new dataframe for each word
for df in dfs:
    vecs_df = df_vecs_populismus[df_vecs_populismus['word'] == df]
    # store the word_df in a variable, for example:
    locals()[df + "_df"] = vecs_df
    print("df_vecs_" + df.lower())

df_vecs_populismus
df_vecs_autoritarismus
df_vecs_neoliberalismus
df_vecs_totalitarismus
df_vecs_radikalismus
df_vecs_revisionismus
df_vecs_pazifismus
df_vecs_antifaschismus
df_vecs_reformismus
df_vecs_islamismus
df_vecs_antikommunismus
df_vecs_kollektivismus
df_vecs_regionalismus
df_vecs_wohlfahrtsstaat
df_vecs_konservatismus
df_vecs_islamophobie
df_vecs_populisten
df_vecs_pluralismus
df_vecs_kommunitarismus
df_vecs_kosmopolitismus
df_vecs_chauvinismus
df_vecs_europäisierung


In [8]:
df_vecs_populismus = dfs["populismus"]
df_vecs_autoritarismus = dfs["autoritarismus"]
df_vecs_neoliberalismus = dfs["neoliberalismus"] 
df_vecs_totalitarismus = dfs["totalitarismus"]
df_vecs_radikalismus = dfs["radikalismus"]
df_vecs_revisionismus = dfs["revisionismus"]
df_vecs_pazifismus = dfs["pazifismus"]
df_vecs_antifaschismus = dfs["antifaschismus"]
df_vecs_reformismus = dfs["reformismus"]
df_vecs_islamismus = dfs["islamismus"]
df_vecs_antikommunismus = dfs["antikommunismus"]
df_vecs_kollektivismus = dfs["kollektivismus"]
df_vecs_regionalismus = dfs["regionalismus"]
df_vecs_wohlfahrtsstaat = dfs["wohlfahrtsstaat"]
df_vecs_konservatismus = dfs["konservatismus"]
df_vecs_islamophobie = dfs["islamophobie"]
df_vecs_populisten = dfs["populisten"]
df_vecs_pluralismus = dfs["pluralismus"]
df_vecs_kommunitarismus = dfs["kommunitarismus"]
df_vecs_kosmopolitismus = dfs["kosmopolitismus"]
df_vecs_chauvinismus = dfs["chauvinismus"]
df_vecs_europäisierung = dfs["europäisierung"]

In [9]:
#df_vecs_feindbild

In [10]:
#converting rows into right spelling for it not caring any contextual value and just beeing misspeled
df_vecs_feindbild.loc[df_vecs_feindbild["word"] == "FeindBild", "word"] = "Feindbild"
df_vecs_neonazismus.loc[df_vecs_neonazismus["word"] == "NeoNazismus", "word"] = "Neonazismus"

#### 1.2. Converting the list of vectors to a float

In [11]:
import ast

df_vecs_populismus['vector'] = df_vecs_populismus['vector'].apply(lambda x: ast.literal_eval(x))
df_vecs_autoritarismus['vector'] = df_vecs_autoritarismus['vector'].apply(lambda x: ast.literal_eval(x))
df_vecs_neoliberalismus['vector'] = df_vecs_neoliberalismus['vector'].apply(lambda x: ast.literal_eval(x))
df_vecs_totalitarismus['vector'] = df_vecs_totalitarismus['vector'].apply(lambda x: ast.literal_eval(x))
df_vecs_radikalismus['vector'] = df_vecs_radikalismus['vector'].apply(lambda x: ast.literal_eval(x))
df_vecs_revisionismus['vector'] = df_vecs_revisionismus['vector'].apply(lambda x: ast.literal_eval(x))
df_vecs_pazifismus['vector'] = df_vecs_pazifismus['vector'].apply(lambda x: ast.literal_eval(x))
df_vecs_antifaschismus['vector'] = df_vecs_antifaschismus['vector'].apply(lambda x: ast.literal_eval(x))
df_vecs_reformismus['vector'] = df_vecs_reformismus['vector'].apply(lambda x: ast.literal_eval(x))
df_vecs_islamismus['vector'] = df_vecs_islamismus['vector'].apply(lambda x: ast.literal_eval(x))
df_vecs_antikommunismus['vector'] = df_vecs_antikommunismus['vector'].apply(lambda x: ast.literal_eval(x))
df_vecs_kollektivismus['vector'] = df_vecs_kollektivismus['vector'].apply(lambda x: ast.literal_eval(x))
df_vecs_regionalismus['vector'] = df_vecs_regionalismus['vector'].apply(lambda x: ast.literal_eval(x))
df_vecs_wohlfahrtsstaat['vector'] = df_vecs_wohlfahrtsstaat['vector'].apply(lambda x: ast.literal_eval(x))
df_vecs_konservatismus['vector'] = df_vecs_konservatismus['vector'].apply(lambda x: ast.literal_eval(x))
df_vecs_islamophobie['vector'] = df_vecs_islamophobie['vector'].apply(lambda x: ast.literal_eval(x))
df_vecs_populisten['vector'] = df_vecs_populisten['vector'].apply(lambda x: ast.literal_eval(x))
df_vecs_pluralismus['vector'] = df_vecs_pluralismus['vector'].apply(lambda x: ast.literal_eval(x))
df_vecs_kommunitarismus['vector'] = df_vecs_kommunitarismus['vector'].apply(lambda x: ast.literal_eval(x))
df_vecs_kosmopolitismus['vector'] = df_vecs_kosmopolitismus['vector'].apply(lambda x: ast.literal_eval(x))
df_vecs_chauvinismus['vector'] = df_vecs_chauvinismus['vector'].apply(lambda x: ast.literal_eval(x))
df_vecs_europäisierung['vector'] = df_vecs_europäisierung['vector'].apply(lambda x: ast.literal_eval(x))
df_vecs_republikanismus['vector'] = df_vecs_republikanismus['vector'].apply(lambda x: ast.literal_eval(x))
df_vecs_feindbild['vector'] = df_vecs_feindbild['vector'].apply(lambda x: ast.literal_eval(x))
df_vecs_neonazismus['vector'] = df_vecs_neonazismus['vector'].apply(lambda x: ast.literal_eval(x))
df_vecs_aktivismus['vector'] = df_vecs_aktivismus['vector'].apply(lambda x: ast.literal_eval(x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_vecs_populismus['vector'] = df_vecs_populismus['vector'].apply(lambda x: ast.literal_eval(x))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_vecs_autoritarismus['vector'] = df_vecs_autoritarismus['vector'].apply(lambda x: ast.literal_eval(x))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_

#### 1.3. Taking the mean() of every Word Vector per Year

In [12]:
df_mean_populismus = df_vecs_populismus.groupby('year').agg({'word': 'first', 'vector': lambda x: np.mean(np.array(x.tolist()), axis=0)}).reset_index()
df_mean_autoritarismus = df_vecs_autoritarismus.groupby('year').agg({'word': 'first', 'vector': lambda x: np.mean(np.array(x.tolist()), axis=0)}).reset_index()
df_mean_neoliberalismus = df_vecs_neoliberalismus.groupby('year').agg({'word': 'first', 'vector': lambda x: np.mean(np.array(x.tolist()), axis=0)}).reset_index()
df_mean_totalitarismus = df_vecs_totalitarismus.groupby('year').agg({'word': 'first', 'vector': lambda x: np.mean(np.array(x.tolist()), axis=0)}).reset_index()
df_mean_radikalismus = df_vecs_radikalismus.groupby('year').agg({'word': 'first', 'vector': lambda x: np.mean(np.array(x.tolist()), axis=0)}).reset_index()
df_mean_revisionismus = df_vecs_revisionismus.groupby('year').agg({'word': 'first', 'vector': lambda x: np.mean(np.array(x.tolist()), axis=0)}).reset_index()
df_mean_pazifismus = df_vecs_pazifismus.groupby('year').agg({'word': 'first', 'vector': lambda x: np.mean(np.array(x.tolist()), axis=0)}).reset_index()
df_mean_antifaschismus = df_vecs_antifaschismus.groupby('year').agg({'word': 'first', 'vector': lambda x: np.mean(np.array(x.tolist()), axis=0)}).reset_index()
df_mean_reformismus = df_vecs_reformismus.groupby('year').agg({'word': 'first', 'vector': lambda x: np.mean(np.array(x.tolist()), axis=0)}).reset_index()
df_mean_islamismus = df_vecs_islamismus.groupby('year').agg({'word': 'first', 'vector': lambda x: np.mean(np.array(x.tolist()), axis=0)}).reset_index()
df_mean_antikommunismus = df_vecs_antikommunismus.groupby('year').agg({'word': 'first', 'vector': lambda x: np.mean(np.array(x.tolist()), axis=0)}).reset_index()
df_mean_kollektivismus = df_vecs_kollektivismus.groupby('year').agg({'word': 'first', 'vector': lambda x: np.mean(np.array(x.tolist()), axis=0)}).reset_index()
df_mean_regionalismus = df_vecs_regionalismus.groupby('year').agg({'word': 'first', 'vector': lambda x: np.mean(np.array(x.tolist()), axis=0)}).reset_index()
df_mean_wohlfahrtsstaat = df_vecs_wohlfahrtsstaat.groupby('year').agg({'word': 'first', 'vector': lambda x: np.mean(np.array(x.tolist()), axis=0)}).reset_index()
df_mean_konservatismus = df_vecs_konservatismus.groupby('year').agg({'word': 'first', 'vector': lambda x: np.mean(np.array(x.tolist()), axis=0)}).reset_index()
df_mean_islamophobie = df_vecs_islamophobie.groupby('year').agg({'word': 'first', 'vector': lambda x: np.mean(np.array(x.tolist()), axis=0)}).reset_index()
df_mean_populisten = df_vecs_populisten.groupby('year').agg({'word': 'first', 'vector': lambda x: np.mean(np.array(x.tolist()), axis=0)}).reset_index()
df_mean_pluralismus = df_vecs_pluralismus.groupby('year').agg({'word': 'first', 'vector': lambda x: np.mean(np.array(x.tolist()), axis=0)}).reset_index()
df_mean_kommunitarismus = df_vecs_kommunitarismus.groupby('year').agg({'word': 'first', 'vector': lambda x: np.mean(np.array(x.tolist()), axis=0)}).reset_index()
df_mean_kosmopolitismus = df_vecs_kosmopolitismus.groupby('year').agg({'word': 'first', 'vector': lambda x: np.mean(np.array(x.tolist()), axis=0)}).reset_index()
df_mean_chauvinismus = df_vecs_chauvinismus.groupby('year').agg({'word': 'first', 'vector': lambda x: np.mean(np.array(x.tolist()), axis=0)}).reset_index()
df_mean_europäisierung = df_vecs_europäisierung.groupby('year').agg({'word': 'first', 'vector': lambda x: np.mean(np.array(x.tolist()), axis=0)}).reset_index()
df_mean_republikanismus = df_vecs_republikanismus.groupby('year').agg({'word': 'first', 'vector': lambda x: np.mean(np.array(x.tolist()), axis=0)}).reset_index()
df_mean_feindbild = df_vecs_feindbild.groupby('year').agg({'word': 'first', 'vector': lambda x: np.mean(np.array(x.tolist()), axis=0)}).reset_index()
df_mean_neonazismus = df_vecs_neonazismus.groupby('year').agg({'word': 'first', 'vector': lambda x: np.mean(np.array(x.tolist()), axis=0)}).reset_index()
df_mean_aktivismus = df_vecs_aktivismus.groupby('year').agg({'word': 'first', 'vector': lambda x: np.mean(np.array(x.tolist()), axis=0)}).reset_index()

#### 1.4. Merging the Data Frames grouped by the Year

In [13]:
def create_df_year(df):
    df_year = pd.DataFrame(columns=['year', 'word', 'vector'])
    for year, df_year_i in df.groupby('year'):
        df_year_i = df_year_i.groupby('word').agg({'vector': np.mean}).reset_index()
        df_year_i['year'] = year
        df_year = pd.concat([df_year, df_year_i], axis=0)
    return df_year

In [14]:
df_merged = pd.concat([df_mean_populismus,
df_mean_autoritarismus,
df_mean_neoliberalismus,
df_mean_totalitarismus,
df_mean_radikalismus,
df_mean_revisionismus,
df_mean_pazifismus,
df_mean_antifaschismus,
df_mean_reformismus,
df_mean_islamismus,
df_mean_antikommunismus,
df_mean_kollektivismus,
df_mean_regionalismus,
df_mean_wohlfahrtsstaat,
df_mean_konservatismus,
df_mean_islamophobie,
df_mean_populisten,
df_mean_pluralismus,
df_mean_kommunitarismus,
df_mean_kosmopolitismus,
df_mean_chauvinismus,
df_mean_europäisierung,
df_mean_republikanismus,
df_mean_feindbild,
df_mean_neonazismus,
df_mean_aktivismus], axis=0)
df_final_populismus = df_merged.groupby('year').apply(create_df_year)

#### 1.5. Creating DataFrame: All WOI and their yearly Mean Vectors sorted by Year

In [15]:
df_final_populismus

Unnamed: 0_level_0,Unnamed: 1_level_0,year,word,vector
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1946.0,0,1946.0,Aktivismus,"[-0.10033565759658813, -0.3471717834472656, 0...."
1946.0,1,1946.0,Chauvinismus,"[-0.17620925977826118, 0.31971531733870506, 0...."
1946.0,2,1946.0,Kollektivismus,"[0.4047844856977463, -0.12862613797187805, -0...."
1946.0,3,1946.0,Konservatismus,"[0.3572208285331726, 0.02614113688468933, -0.0..."
1946.0,4,1946.0,Totalitarismus,"[-0.4249613471329212, -0.20900606364011765, -0..."
...,...,...,...,...
2014.0,11,2014.0,Populismus,"[0.2758929021656513, -0.6608073850130213, -0.1..."
2014.0,12,2014.0,Populisten,"[0.1445917466448413, -0.4580611541039414, -0.3..."
2014.0,13,2014.0,Radikalismus,"[0.8135848045349121, 0.2500734329223633, -0.48..."
2014.0,14,2014.0,Totalitarismus,"[0.09382973611354828, 0.053399503231048584, -0..."


In [37]:
#safe_df = df_final_populismus.to_csv("df_final_populismus.csv", index=False)

## 2. Data Calculation

### 2.1. Calculating the Cosine Similarity

In [17]:
# Get a list of all unique words and years
unique_words = df_final_populismus['word'].unique()
unique_years = df_final_populismus['year'].unique()

cosine_similarities = {}
for year1 in unique_years:
    for word1 in unique_words:
        vector1 = df_final_populismus.loc[(df_final_populismus['word'] == word1) & (df_final_populismus['year'] == year1), 'vector'].values
        if len(vector1) == 0:
            continue
        vector1 = vector1[0]
        for year2 in unique_years:
            for word2 in unique_words:
                vector2 = df_final_populismus.loc[(df_final_populismus['word'] == word2) & (df_final_populismus['year'] == year2), 'vector'].values
                if len(vector2) == 0:
                    continue
                vector2 = vector2[0]
                dot_product = np.dot(vector1, vector2)
                magnitude_vector1 = np.linalg.norm(vector1)
                magnitude_vector2 = np.linalg.norm(vector2)
                cosine_similarity = dot_product / (magnitude_vector1 * magnitude_vector2)
                cosine_similarities[f"{word1}_{year1}_{word2}_{year2}"] = cosine_similarity

In [18]:
cosine_similarities = pd.DataFrame.from_dict(cosine_similarities, orient='index', columns=['cosine_similarity'])
cosine_similarities = cosine_similarities.reset_index()
cosine_similarities['word1'], cosine_similarities['year1'], cosine_similarities['word2'], cosine_similarities['year2'] = zip(*cosine_similarities['index'].str.split('_'))
cosine_similarities[['year1', 'year2']] = cosine_similarities[['year1', 'year2']].astype(float)

In [21]:
#save_df = cosine_similarities.to_csv('cosine_similarities_populismus_all_w_all_y.csv', index=False)

In [19]:
cosine_similarities

Unnamed: 0,index,cosine_similarity,word1,year1,word2,year2
0,Aktivismus_1946.0_Aktivismus_1946.0,1.000000,Aktivismus,1946.0,Aktivismus,1946.0
1,Aktivismus_1946.0_Chauvinismus_1946.0,0.686457,Aktivismus,1946.0,Chauvinismus,1946.0
2,Aktivismus_1946.0_Kollektivismus_1946.0,0.758117,Aktivismus,1946.0,Kollektivismus,1946.0
3,Aktivismus_1946.0_Konservatismus_1946.0,0.675478,Aktivismus,1946.0,Konservatismus,1946.0
4,Aktivismus_1946.0_Totalitarismus_1946.0,0.711546,Aktivismus,1946.0,Totalitarismus,1946.0
...,...,...,...,...,...,...
925439,Islamismus_2014.0_Populismus_2014.0,0.806455,Islamismus,2014.0,Populismus,2014.0
925440,Islamismus_2014.0_Autoritarismus_2014.0,0.743988,Islamismus,2014.0,Autoritarismus,2014.0
925441,Islamismus_2014.0_Feindbild_2014.0,0.681500,Islamismus,2014.0,Feindbild,2014.0
925442,Islamismus_2014.0_Populisten_2014.0,0.770661,Islamismus,2014.0,Populisten,2014.0


## 2.2. Creating DataFrame: Comparing every word with "Populismus"

In [20]:
#cosine_similarities = pd.read_csv("cosine_similarities_populismus_all_w_all_y.csv") 

In [21]:
df_same_years = cosine_similarities.loc[cosine_similarities['year1'] == cosine_similarities['year2']]

In [22]:
df_same_years_populismus = df_same_years.loc[df_same_years['word1'] == 'Populismus']

In [23]:
df_same_years_populismus

Unnamed: 0,index,cosine_similarity,word1,year1,word2,year2
125177,Populismus_1964.0_Aktivismus_1964.0,0.544553,Populismus,1964.0,Aktivismus,1964.0
125178,Populismus_1964.0_Chauvinismus_1964.0,0.527433,Populismus,1964.0,Chauvinismus,1964.0
125179,Populismus_1964.0_Kollektivismus_1964.0,0.524776,Populismus,1964.0,Kollektivismus,1964.0
125180,Populismus_1964.0_Konservatismus_1964.0,0.548823,Populismus,1964.0,Konservatismus,1964.0
125181,Populismus_1964.0_Totalitarismus_1964.0,0.586279,Populismus,1964.0,Totalitarismus,1964.0
...,...,...,...,...,...,...
921591,Populismus_2014.0_Populismus_2014.0,1.000000,Populismus,2014.0,Populismus,2014.0
921592,Populismus_2014.0_Autoritarismus_2014.0,0.888031,Populismus,2014.0,Autoritarismus,2014.0
921593,Populismus_2014.0_Feindbild_2014.0,0.789103,Populismus,2014.0,Feindbild,2014.0
921594,Populismus_2014.0_Populisten_2014.0,0.885804,Populismus,2014.0,Populisten,2014.0


### 2.2.1. Visualization: Similarity Development between WOI and the word "Populismus" over the years

In [24]:
custom_color_map = {
    'Aktivismus': '#00FF00',
    'Antifaschismus': '#E67E22',
    'Antikommunismus': '#FF69B4',
    'Autoritarismus': '#8E44AD',
    'Chauvinismus': '#1ABC9C',
    'Feindbild': '#FFD700',
    'Islamismus': '#FF00FF',
    'Islamophobie': '#2ECC71',
    'Kollektivismus': '#F39C12',
    'Kommunitarismus': '#44B1CF',
    'Konservatismus': '#28a2f7',
    'Kosmopolitismus': '#05e3e3',
    'Neoliberalismus': '#a8985b',
    'Neonazismus': '#5F56FC',
    'Pazifismus': '#D3AFCD',
    'Pluralismus': '#F1C40F',
    'Populisten': '#6F1E51',
    'Populismus': '#E74C3C',
    'Radikalismus': '#3498DB',
    'Regionalismus': '#8B4513',
    'Reformismus': '#FFC300',
    'Republikanismus': '#9400D3',
    'Revisionismus': '#2ECC40',
    'Solidarität': '#FF5733',
    'Totalitarismus': '#7F8C8D',
    'Wohlfahrtsstaat': '#339900'
}

In [25]:
import plotly.express as px


fig = px.line(df_same_years_populismus, x="year1", y="cosine_similarity", color="word2", color_discrete_map=custom_color_map, title = "Similarity Developement between Populismus and WOI over the Years")
fig.update_yaxes(range=[0, 1])

#fig.show()

fig.write_html("cosine_similarity_populismus_ld_all_words.html")

### 2.2.2. Measuring the biggest Amplitude

In [26]:
import copy
df_same_years_populismus_new = copy.deepcopy(df_same_years_populismus)

In [27]:
df_same_years_populismus_new_amplitude = df_same_years_populismus_new.groupby(['word1', 'word2']).agg({'cosine_similarity': [min, max]})
df_same_years_populismus_new_amplitude['amplitude'] = df_same_years_populismus_new_amplitude['cosine_similarity']['max'] - df_same_years_populismus_new_amplitude['cosine_similarity']['min']
df_same_years_populismus_new_amplitude = df_same_years_populismus_new_amplitude.reset_index()
result = df_same_years_populismus_new_amplitude[df_same_years_populismus_new_amplitude['amplitude'] == df_same_years_populismus_new_amplitude['amplitude'].max()]
result

Unnamed: 0_level_0,word1,word2,cosine_similarity,cosine_similarity,amplitude
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,min,max,Unnamed: 5_level_1
11,Populismus,Konservatismus,0.548823,0.95557,0.406748


### 2.2.3. Measuring the Ascending Amplitude

In [28]:
df_same_years_populismus_new_amplitude = df_same_years_populismus_new_amplitude.sort_values("amplitude", ascending=False)
#inplace=True)
df_same_years_populismus_new_amplitude

Unnamed: 0_level_0,word1,word2,cosine_similarity,cosine_similarity,amplitude
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,min,max,Unnamed: 5_level_1
11,Populismus,Konservatismus,0.548823,0.95557,0.4067475
3,Populismus,Autoritarismus,0.523099,0.927516,0.4044173
15,Populismus,Pazifismus,0.532098,0.936143,0.4040456
19,Populismus,Radikalismus,0.526105,0.909463,0.383358
4,Populismus,Chauvinismus,0.527433,0.892353,0.36492
24,Populismus,Totalitarismus,0.54672,0.907372,0.3606523
7,Populismus,Islamismus,0.565445,0.921853,0.3564079
0,Populismus,Aktivismus,0.544553,0.900165,0.3556115
10,Populismus,Kommunitarismus,0.567738,0.909716,0.3419781
1,Populismus,Antifaschismus,0.592762,0.931881,0.3391197


#### 2.2.4. Visualization Developement over the years of: 3 biggest Aplitude and 3 lowest Amplitude

In [29]:
import plotly.express as px


# Filter dataframe to include only desired words
df_filtered = df_same_years_populismus[df_same_years_populismus["word1"].isin(["Populismus"]) & df_same_years_populismus["word2"].isin(["Kosmopolitismus", "Europäisierung", "Islamophobie", "Pazifismus", "Autoritarismus", "Konservatismus"])]

# Map each word to a different color and line shape
color_map = {"Kosmopolitismus": "#2ECC40", "Europäisierung": "#2ECC40", "Islamophobie": "#2ECC40", "Pazifismus": "#FF5733", "Autoritarismus": "#FF5733", "Konservatismus": "#FF5733"}
line_shape_map = {"Kosmopolitismus": "solid", "Europäisierung": "dot", "Islamophobie": "dash", "Pazifismus": "solid", "Autoritarismus": "dot", "Konservatismus": "dash"}

# Create plot with desired colors and line shapes
fig = px.line(df_filtered, x="year1", y="cosine_similarity", color="word2", line_dash="word2", title="3 Biggest and 3 smallest Amplitude of Cosine Similarity", color_discrete_map=color_map, line_dash_map=line_shape_map)
fig.update_yaxes(range=[0, 1])

# Save plot to HTML file
fig.write_html("cosine_similarity_populismus_ld_filtered.html")

## 2.3. Creating DataFrame: Comparing every word with oneself over the years

### 2.3.1. Repeating the steps from 1. Data Managing since the mean vectos have to be recalculated 

##### Creating a Dataframe with decade mean vectors first

In [31]:
#making a copy of the original variables
import copy
df_vecs_populismus_copy = copy.deepcopy(df_vecs_populismus)
df_vecs_autoritarismus_copy = copy.deepcopy(df_vecs_autoritarismus)
df_vecs_neoliberalismus_copy = copy.deepcopy(df_vecs_neoliberalismus)
df_vecs_totalitarismus_copy = copy.deepcopy(df_vecs_totalitarismus)
df_vecs_radikalismus_copy = copy.deepcopy(df_vecs_radikalismus)
df_vecs_revisionismus_copy = copy.deepcopy(df_vecs_revisionismus)
df_vecs_pazifismus_copy = copy.deepcopy(df_vecs_pazifismus)
df_vecs_antifaschismus_copy = copy.deepcopy(df_vecs_antifaschismus)
df_vecs_reformismus_copy = copy.deepcopy(df_vecs_reformismus)
df_vecs_islamismus_copy = copy.deepcopy(df_vecs_islamismus)
df_vecs_antikommunismus_copy = copy.deepcopy(df_vecs_antikommunismus)
df_vecs_kollektivismus_copy = copy.deepcopy(df_vecs_kollektivismus)
df_vecs_regionalismus_copy = copy.deepcopy(df_vecs_regionalismus)
df_vecs_wohlfahrtsstaat_copy = copy.deepcopy(df_vecs_wohlfahrtsstaat)
df_vecs_konservatismus_copy = copy.deepcopy(df_vecs_konservatismus)
df_vecs_islamophobie_copy = copy.deepcopy(df_vecs_islamophobie)
df_vecs_populisten_copy = copy.deepcopy(df_vecs_populisten)
df_vecs_pluralismus_copy = copy.deepcopy(df_vecs_pluralismus)
df_vecs_kommunitarismus_copy = copy.deepcopy(df_vecs_kommunitarismus)
df_vecs_kosmopolitismus_copy = copy.deepcopy(df_vecs_kosmopolitismus)
df_vecs_chauvinismus_copy = copy.deepcopy(df_vecs_chauvinismus)
df_vecs_europäisierung_copy = copy.deepcopy(df_vecs_europäisierung)
df_vecs_republikanismus_copy = copy.deepcopy(df_vecs_republikanismus)
df_vecs_feindbild_copy = copy.deepcopy(df_vecs_feindbild)
df_vecs_neonazismus_copy = copy.deepcopy(df_vecs_neonazismus)
df_vecs_aktivismus_copy = copy.deepcopy(df_vecs_aktivismus)

In [32]:
# Create a dictionary mapping the year to the decade
# First appearence of "Populismus" is in 1964, so the decades are here different then in the other topic noteboks, or maybe not
def get_decade(year):
    if 1946 <= year <= 1955:
        return "1946-1955"
    elif 1956 <= year <= 1965:
        return "1956-1965"
    elif 1966 <= year <= 1975:
        return "1966-1975"
    elif 1976 <= year <= 1985:
        return "1976-1985"
    elif 1986 <= year <= 1995:
        return "1986-1995"
    elif 1996 <= year <= 2005:
        return "1996-2005"
    elif 2006 <= year <= 2014:
        return "2006-2014"
    else:
        return None

In [33]:
# creating a "decade" column and assinging a decade to every row
df_vecs_populismus_copy['decade'] = df_vecs_populismus_copy["year"].apply(lambda x: get_decade(x))
df_vecs_autoritarismus_copy['decade'] = df_vecs_autoritarismus_copy["year"].apply(lambda x: get_decade(x))
df_vecs_neoliberalismus_copy['decade'] = df_vecs_neoliberalismus_copy["year"].apply(lambda x: get_decade(x))
df_vecs_totalitarismus_copy['decade'] = df_vecs_totalitarismus_copy["year"].apply(lambda x: get_decade(x))
df_vecs_radikalismus_copy['decade'] = df_vecs_radikalismus_copy["year"].apply(lambda x: get_decade(x))
df_vecs_revisionismus_copy['decade'] = df_vecs_revisionismus_copy["year"].apply(lambda x: get_decade(x))
df_vecs_pazifismus_copy['decade'] = df_vecs_pazifismus_copy["year"].apply(lambda x: get_decade(x))
df_vecs_antifaschismus_copy['decade'] = df_vecs_antifaschismus_copy["year"].apply(lambda x: get_decade(x))
df_vecs_reformismus_copy['decade'] = df_vecs_reformismus_copy["year"].apply(lambda x: get_decade(x))
df_vecs_islamismus_copy['decade'] = df_vecs_islamismus_copy["year"].apply(lambda x: get_decade(x))
df_vecs_antikommunismus_copy['decade'] = df_vecs_antikommunismus_copy["year"].apply(lambda x: get_decade(x))
df_vecs_kollektivismus_copy['decade'] = df_vecs_kollektivismus_copy["year"].apply(lambda x: get_decade(x))
df_vecs_regionalismus_copy['decade'] = df_vecs_regionalismus_copy["year"].apply(lambda x: get_decade(x))
df_vecs_wohlfahrtsstaat_copy['decade'] = df_vecs_wohlfahrtsstaat_copy["year"].apply(lambda x: get_decade(x))
df_vecs_konservatismus_copy['decade'] = df_vecs_konservatismus_copy["year"].apply(lambda x: get_decade(x))
df_vecs_islamophobie_copy['decade'] = df_vecs_islamophobie_copy["year"].apply(lambda x: get_decade(x))
df_vecs_populisten_copy['decade'] = df_vecs_populisten_copy["year"].apply(lambda x: get_decade(x))
df_vecs_pluralismus_copy['decade'] = df_vecs_pluralismus_copy["year"].apply(lambda x: get_decade(x))
df_vecs_kommunitarismus_copy['decade'] = df_vecs_kommunitarismus_copy["year"].apply(lambda x: get_decade(x))
df_vecs_kosmopolitismus_copy['decade'] = df_vecs_kosmopolitismus_copy["year"].apply(lambda x: get_decade(x))
df_vecs_chauvinismus_copy['decade'] = df_vecs_chauvinismus_copy["year"].apply(lambda x: get_decade(x))
df_vecs_europäisierung_copy['decade'] = df_vecs_europäisierung_copy["year"].apply(lambda x: get_decade(x))
df_vecs_republikanismus_copy['decade'] = df_vecs_republikanismus_copy["year"].apply(lambda x: get_decade(x))
df_vecs_feindbild_copy['decade'] = df_vecs_feindbild_copy["year"].apply(lambda x: get_decade(x))
df_vecs_neonazismus_copy['decade'] = df_vecs_neonazismus_copy["year"].apply(lambda x: get_decade(x))
df_vecs_aktivismus_copy['decade'] = df_vecs_aktivismus_copy["year"].apply(lambda x: get_decade(x))

In [34]:
# Calculating the mean vector for the decade for every word
df_mean_populismus_decade = df_vecs_populismus_copy.groupby('decade').agg({'word': 'first', 'vector': lambda x: np.mean(np.array(x.tolist()), axis=0)}).reset_index()
df_mean_autoritarismus_decade = df_vecs_autoritarismus_copy.groupby('decade').agg({'word': 'first', 'vector': lambda x: np.mean(np.array(x.tolist()), axis=0)}).reset_index()
df_mean_neoliberalismus_decade = df_vecs_neoliberalismus_copy.groupby('decade').agg({'word': 'first', 'vector': lambda x: np.mean(np.array(x.tolist()), axis=0)}).reset_index()
df_mean_totalitarismus_decade = df_vecs_totalitarismus_copy.groupby('decade').agg({'word': 'first', 'vector': lambda x: np.mean(np.array(x.tolist()), axis=0)}).reset_index()
df_mean_radikalismus_decade = df_vecs_radikalismus_copy.groupby('decade').agg({'word': 'first', 'vector': lambda x: np.mean(np.array(x.tolist()), axis=0)}).reset_index()
df_mean_revisionismus_decade = df_vecs_revisionismus_copy.groupby('decade').agg({'word': 'first', 'vector': lambda x: np.mean(np.array(x.tolist()), axis=0)}).reset_index()
df_mean_pazifismus_decade = df_vecs_pazifismus_copy.groupby('decade').agg({'word': 'first', 'vector': lambda x: np.mean(np.array(x.tolist()), axis=0)}).reset_index()
df_mean_antifaschismus_decade = df_vecs_antifaschismus_copy.groupby('decade').agg({'word': 'first', 'vector': lambda x: np.mean(np.array(x.tolist()), axis=0)}).reset_index()
df_mean_reformismus_decade = df_vecs_reformismus_copy.groupby('decade').agg({'word': 'first', 'vector': lambda x: np.mean(np.array(x.tolist()), axis=0)}).reset_index()
df_mean_islamismus_decade = df_vecs_islamismus_copy.groupby('decade').agg({'word': 'first', 'vector': lambda x: np.mean(np.array(x.tolist()), axis=0)}).reset_index()
df_mean_antikommunismus_decade = df_vecs_antikommunismus_copy.groupby('decade').agg({'word': 'first', 'vector': lambda x: np.mean(np.array(x.tolist()), axis=0)}).reset_index()
df_mean_kollektivismus_decade = df_vecs_kollektivismus_copy.groupby('decade').agg({'word': 'first', 'vector': lambda x: np.mean(np.array(x.tolist()), axis=0)}).reset_index()
df_mean_regionalismus_decade = df_vecs_regionalismus_copy.groupby('decade').agg({'word': 'first', 'vector': lambda x: np.mean(np.array(x.tolist()), axis=0)}).reset_index()
df_mean_wohlfahrtsstaat_decade = df_vecs_wohlfahrtsstaat_copy.groupby('decade').agg({'word': 'first', 'vector': lambda x: np.mean(np.array(x.tolist()), axis=0)}).reset_index()
df_mean_konservatismus_decade = df_vecs_konservatismus_copy.groupby('decade').agg({'word': 'first', 'vector': lambda x: np.mean(np.array(x.tolist()), axis=0)}).reset_index()
df_mean_islamophobie_decade = df_vecs_islamophobie_copy.groupby('decade').agg({'word': 'first', 'vector': lambda x: np.mean(np.array(x.tolist()), axis=0)}).reset_index()
df_mean_populisten_decade = df_vecs_populisten_copy.groupby('decade').agg({'word': 'first', 'vector': lambda x: np.mean(np.array(x.tolist()), axis=0)}).reset_index()
df_mean_pluralismus_decade = df_vecs_pluralismus_copy.groupby('decade').agg({'word': 'first', 'vector': lambda x: np.mean(np.array(x.tolist()), axis=0)}).reset_index()
df_mean_kommunitarismus_decade = df_vecs_kommunitarismus_copy.groupby('decade').agg({'word': 'first', 'vector': lambda x: np.mean(np.array(x.tolist()), axis=0)}).reset_index()
df_mean_kosmopolitismus_decade = df_vecs_kosmopolitismus_copy.groupby('decade').agg({'word': 'first', 'vector': lambda x: np.mean(np.array(x.tolist()), axis=0)}).reset_index()
df_mean_chauvinismus_decade = df_vecs_chauvinismus_copy.groupby('decade').agg({'word': 'first', 'vector': lambda x: np.mean(np.array(x.tolist()), axis=0)}).reset_index()
df_mean_europäisierung_decade = df_vecs_europäisierung_copy.groupby('decade').agg({'word': 'first', 'vector': lambda x: np.mean(np.array(x.tolist()), axis=0)}).reset_index()
df_mean_republikanismus_decade = df_vecs_republikanismus_copy.groupby('decade').agg({'word': 'first', 'vector': lambda x: np.mean(np.array(x.tolist()), axis=0)}).reset_index()
df_mean_feindbild_decade = df_vecs_feindbild_copy.groupby('decade').agg({'word': 'first', 'vector': lambda x: np.mean(np.array(x.tolist()), axis=0)}).reset_index()
df_mean_neonazismus_decade = df_vecs_neonazismus_copy.groupby('decade').agg({'word': 'first', 'vector': lambda x: np.mean(np.array(x.tolist()), axis=0)}).reset_index()
df_mean_aktivismus_decade = df_vecs_aktivismus_copy.groupby('decade').agg({'word': 'first', 'vector': lambda x: np.mean(np.array(x.tolist()), axis=0)}).reset_index()

In [35]:
df_mean_decade_concat = pd.concat([df_mean_populismus_decade,
df_mean_autoritarismus_decade,
df_mean_neoliberalismus_decade,
df_mean_totalitarismus_decade,
df_mean_radikalismus_decade,
df_mean_revisionismus_decade,
df_mean_pazifismus_decade,
df_mean_antifaschismus_decade,
df_mean_reformismus_decade, 
df_mean_islamismus_decade, 
df_mean_antikommunismus_decade,
df_mean_kollektivismus_decade,
df_mean_regionalismus_decade,
df_mean_wohlfahrtsstaat_decade,
df_mean_konservatismus_decade,
df_mean_islamophobie_decade,
df_mean_populisten_decade, 
df_mean_pluralismus_decade,
df_mean_kommunitarismus_decade,
df_mean_kosmopolitismus_decade,
df_mean_chauvinismus_decade,
df_mean_europäisierung_decade,
df_mean_republikanismus_decade,
df_mean_feindbild_decade,
df_mean_neonazismus_decade,
df_mean_aktivismus_decade], axis=0)

In [36]:
def create_df_decade(df):
    df_year = pd.DataFrame(columns=['decade', 'word', 'vector'])
    for year, df_year_i in df.groupby('decade'):
        df_year_i = df_year_i.groupby('word').agg({'vector': np.mean}).reset_index()
        df_year_i['decade'] = year
        df_year = pd.concat([df_year, df_year_i], axis=0)
    return df_year

In [37]:
df_mean_decade = df_mean_decade_concat.groupby('decade').apply(create_df_decade)

In [38]:
#save_df = df_mean_decade.to_csv('df_mean_decade_populismus_all_w_all_y.csv', index=False)

### 2.3.2. Cosine Similarity: calculating mean vectors for decades

In [39]:
# Get a list of all unique words
unique_words = df_mean_decade['word'].unique()

# Get a list of all unique decades
unique_decades = df_mean_decade['decade'].unique()

cosine_similarities_decade = {}
for i, decade1 in enumerate(unique_decades):
    for word1 in unique_words:
        vector1 = df_mean_decade.loc[(df_mean_decade['word'] == word1) & (df_mean_decade['decade'] == decade1), 'vector'].values
        if len(vector1) == 0:
            continue
        vector1 = vector1[0]
        for j, decade2 in enumerate(unique_decades[i+1:]):
           #  Check if the difference between the decades is equal to 10
            if int(decade2[:4]) - int(decade1[:4]) != 10:
                continue
            for word2 in unique_words:
                vector2 = df_mean_decade.loc[(df_mean_decade['word'] == word2) & (df_mean_decade['decade'] == decade2), 'vector'].values
                if len(vector2) == 0:
                    continue
                vector2 = vector2[0]
                dot_product = np.dot(vector1, vector2)
                magnitude_vector1 = np.linalg.norm(vector1)
                magnitude_vector2 = np.linalg.norm(vector2)
                cosine_similarity = dot_product / (magnitude_vector1 * magnitude_vector2)
                cosine_similarities_decade[f"{word1}_{decade1}_{word2}_{decade2}"] = cosine_similarity
cosine_similarities_decade = pd.DataFrame.from_dict(cosine_similarities_decade, orient='index', columns=['cosine_similarity'])
cosine_similarities_decade = cosine_similarities_decade.reset_index()
cosine_similarities_decade['word1'], cosine_similarities_decade['decade1'], cosine_similarities_decade['word2'], cosine_similarities_decade['decade2'] = zip(*cosine_similarities_decade['index'].str.split('_'))
cosine_similarities_decade[['decade1', 'decade2']] = cosine_similarities_decade[['decade1', 'decade2']].astype(str)

In [40]:
cosine_similarities_decade

Unnamed: 0,index,cosine_similarity,word1,decade1,word2,decade2
0,Aktivismus_1946-1955_Aktivismus_1956-1965,0.786550,Aktivismus,1946-1955,Aktivismus,1956-1965
1,Aktivismus_1946-1955_Antifaschismus_1956-1965,0.808362,Aktivismus,1946-1955,Antifaschismus,1956-1965
2,Aktivismus_1946-1955_Antikommunismus_1956-1965,0.920785,Aktivismus,1946-1955,Antikommunismus,1956-1965
3,Aktivismus_1946-1955_Chauvinismus_1956-1965,0.892212,Aktivismus,1946-1955,Chauvinismus,1956-1965
4,Aktivismus_1946-1955_Europäisierung_1956-1965,0.766688,Aktivismus,1946-1955,Europäisierung,1956-1965
...,...,...,...,...,...,...
3173,Kommunitarismus_1996-2005_Feindbild_2006-2014,0.818512,Kommunitarismus,1996-2005,Feindbild,2006-2014
3174,Kommunitarismus_1996-2005_Populisten_2006-2014,0.884226,Kommunitarismus,1996-2005,Populisten,2006-2014
3175,Kommunitarismus_1996-2005_Islamismus_2006-2014,0.933714,Kommunitarismus,1996-2005,Islamismus,2006-2014
3176,Kommunitarismus_1996-2005_Islamophobie_2006-2014,0.887411,Kommunitarismus,1996-2005,Islamophobie,2006-2014


### 2.3.3. Creating Word2Word DataFrame with only Cosine Similarities to ONESELF 

In [41]:
# Filter the rows where the word1 and word2 are the same and decade1 is consecutive to decade2
df = cosine_similarities_decade[(cosine_similarities_decade["word1"] == cosine_similarities_decade["word2"]) & (cosine_similarities_decade["decade1"] == cosine_similarities_decade["decade1"])]

# Reset the index of the filtered dataframe
#df = df.reset_index(drop=True)

# Print the filtered dataframe
df

Unnamed: 0,index,cosine_similarity,word1,decade1,word2,decade2
0,Aktivismus_1946-1955_Aktivismus_1956-1965,0.786550,Aktivismus,1946-1955,Aktivismus,1956-1965
20,Antifaschismus_1946-1955_Antifaschismus_1956-1965,0.813702,Antifaschismus,1946-1955,Antifaschismus,1956-1965
40,Antikommunismus_1946-1955_Antikommunismus_1956...,0.790928,Antikommunismus,1946-1955,Antikommunismus,1956-1965
60,Chauvinismus_1946-1955_Chauvinismus_1956-1965,0.918387,Chauvinismus,1946-1955,Chauvinismus,1956-1965
80,Europäisierung_1946-1955_Europäisierung_1956-1965,0.882730,Europäisierung,1946-1955,Europäisierung,1956-1965
...,...,...,...,...,...,...
3069,Feindbild_1996-2005_Feindbild_2006-2014,0.980762,Feindbild,1996-2005,Feindbild,2006-2014
3096,Populisten_1996-2005_Populisten_2006-2014,0.992340,Populisten,1996-2005,Populisten,2006-2014
3123,Islamismus_1996-2005_Islamismus_2006-2014,0.986947,Islamismus,1996-2005,Islamismus,2006-2014
3150,Islamophobie_1996-2005_Islamophobie_2006-2014,0.927398,Islamophobie,1996-2005,Islamophobie,2006-2014


In [42]:
df["x_axis_label"] = df["decade2"].apply(lambda x: "bis "+str(int(x[-4:])-1) if x == "1946-1955" else x)
df["x_axis_label"] = df["x_axis_label"].apply(lambda x: "bis "+str(int(x[-4:])+1) if x == "1956-1965" else x)
df["x_axis_label"] = df["x_axis_label"].apply(lambda x: "bis "+str(int(x[-4:])+1) if x == "1966-1975" else x)
df["x_axis_label"] = df["x_axis_label"].apply(lambda x: "bis "+str(int(x[-4:])+1) if x == "1976-1985" else x)
df["x_axis_label"] = df["x_axis_label"].apply(lambda x: "bis "+str(int(x[-4:])+1) if x == "1986-1995" else x)
df["x_axis_label"] = df["x_axis_label"].apply(lambda x: "bis "+str(int(x[-4:])+1) if x == "1996-2005" else x)
df["x_axis_label"] = df["x_axis_label"].apply(lambda x: "bis "+str(int(x[-4:])+1) if x == "2006-2014" else x)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/

In [43]:
df

Unnamed: 0,index,cosine_similarity,word1,decade1,word2,decade2,x_axis_label
0,Aktivismus_1946-1955_Aktivismus_1956-1965,0.786550,Aktivismus,1946-1955,Aktivismus,1956-1965,bis 1966
20,Antifaschismus_1946-1955_Antifaschismus_1956-1965,0.813702,Antifaschismus,1946-1955,Antifaschismus,1956-1965,bis 1966
40,Antikommunismus_1946-1955_Antikommunismus_1956...,0.790928,Antikommunismus,1946-1955,Antikommunismus,1956-1965,bis 1966
60,Chauvinismus_1946-1955_Chauvinismus_1956-1965,0.918387,Chauvinismus,1946-1955,Chauvinismus,1956-1965,bis 1966
80,Europäisierung_1946-1955_Europäisierung_1956-1965,0.882730,Europäisierung,1946-1955,Europäisierung,1956-1965,bis 1966
...,...,...,...,...,...,...,...
3069,Feindbild_1996-2005_Feindbild_2006-2014,0.980762,Feindbild,1996-2005,Feindbild,2006-2014,bis 2015
3096,Populisten_1996-2005_Populisten_2006-2014,0.992340,Populisten,1996-2005,Populisten,2006-2014,bis 2015
3123,Islamismus_1996-2005_Islamismus_2006-2014,0.986947,Islamismus,1996-2005,Islamismus,2006-2014,bis 2015
3150,Islamophobie_1996-2005_Islamophobie_2006-2014,0.927398,Islamophobie,1996-2005,Islamophobie,2006-2014,bis 2015


<b>!For scientific corecctness we have to eluminate words out of the calculation, which are not represented in every consecutive decade!</b>

In [60]:
#to_eliminate = [""]
#df = df[~df['word1'].isin(to_eliminate)]

In [44]:
quantile_5 = df["cosine_similarity"].quantile(0.05)

In [45]:
df.loc[df["cosine_similarity"] <= quantile_5 ]["word2"].unique()

array(['Regionalismus', 'Neoliberalismus', 'Populismus',
       'Kosmopolitismus'], dtype=object)

## 2.3.4. Visualization: Similarity Development between WOI to Themselfes over the Years

In [47]:
import plotly.express as px



fig = px.line(df, x='x_axis_label', y='cosine_similarity', color='word2', color_discrete_map=custom_color_map, title = "Similarity Developement of WOI to Oneself over Decades")
fig.update_yaxes(range=[0, 1])
fig.update_layout(title={'text': "Similarity Developement of WOI to Oneself over Decades", 'y':0.95,'x':0.5,'xanchor': 'right','yanchor': 'top'})

fig.add_shape(
    type = 'line',
    y0=quantile_5,
    y1=quantile_5,
    x0=df['x_axis_label'].min(),
    x1=df['x_axis_label'].max(),
    yref='paper',
    xref='x',
    line=dict(color='gray', dash='dash')
)
fig.add_annotation(
    x='bis 2015',
    y=quantile_5,
    text=f'5% quantile: {quantile_5:.5f}',
    xref='x',
    yref='y',
    showarrow=False,
    font=dict(color='gray'))
    #fig.show()
fig.write_html("cosine_similarity_populismus_word2word_decade_all_words.html")