In [None]:
import numpy as np
import altair as alt
import pandas as pd
from altair import datum
from sklearn.metrics import r2_score
from scipy.stats import pearsonr
from textwrap import wrap



In [None]:
!pip install vega-datasets

In [None]:
from vega_datasets import data

In [None]:
world_data = pd.read_csv('V-Dem-CPD-Party-V2.csv')

world_data.head()

In [None]:
print(world_data.shape)

In [None]:
world_data = world_data[world_data['year'] > 1990]

In [None]:
print(world_data.shape)

In [None]:
eu_countries = ['Austria', 'Belgium', 'Bulgaria', 'Croatia', 'Cyprus', 'Czechia', 'Denmark',
  'Estonia', 'Finland', 'France', 'Germany', 'Greece', 'Hungary', 'Ireland',
  'Italy', 'Latvia', 'Lithuania', 'Luxembourg', 'Malta', 'Netherlands',
  'Poland', 'Portugal', 'Romania', 'Slovakia', 'Slovenia', 'Spain', 'Sweden']

In [None]:
eu_data = world_data.loc[world_data['country_name'].isin(eu_countries)]

eu_data.head()

In [None]:
print(eu_data.shape)

In [None]:
world_data['v2paseatpercentage'] = world_data['v2panumbseat'] / world_data['v2patotalseat']

print(world_data['v2paseatpercentage'])

In [None]:
print(world_data.shape)
print(eu_data.shape)

In [None]:
world = data.world_110m.url
world

In [None]:
missing_values_count = world_data.isnull().sum()

print(missing_values_count['year'])

# print(missing_values_count['v2paminor'])

In [None]:
# Columns with no missing data
world_data.columns[world_data.isnull().any() == False]

In [None]:
test = eu_data.dropna(subset=['v2paimmig', 'v2paminor', 'year', 'country_name'])
print(test.shape)

In [None]:
eu_count = eu_data.groupby(['country_name', 'year']).size().reset_index(name='count')

eu_count.head(20)

In [None]:
print(eu_count.shape)

## Notes:

maybe use all countries in europe, not just EU

In [None]:
print(eu_data.shape)

In [None]:
# Minority Rights, Immigration, Gender Equality
test = eu_data[['country_name', 'v2paenname', 'year', 'v2paminor', 'v2paimmig', 'v2pagender']].drop_duplicates()
test['year'] = test['year'].astype(int)
test['v2paminor'] = test['v2paminor'].astype(float)
test['v2paimmig'] = test['v2paimmig'].astype(float)
test['v2pagender'] = test['v2pagender'].astype(float)

test.head()

In [None]:
print(test.shape)

In [None]:
tcount = test.isnull().sum()
tcount

In [None]:
print(test[test.isna().any(axis=1)]['country_name'].unique())

In [None]:

# test[test['v2paminor'] > 2.5] = 5



# test.head(20)
# # test['v2paimmig'] = test['v2paimmig'].apply(lambda x: 'high' if x > 5 else x)
# # test['v2pagender'] = test['v2pagender'].apply(lambda x: 'high' if x > 5 else x)

In [None]:
print(test.shape)

In [None]:
tcount = test.isnull().sum()
tcount

In [None]:
test.head(20)

In [None]:
country_list = test['country_name'].unique()
country_list = list(filter(lambda d: d is not None, country_list)) # filter out None values
country_list.sort()

year_list = list(range(1990, 2021))

In [None]:
alt.data_transformers.disable_max_rows()

# Make radio button less cramped by adding a space after each label
# The spacing will only show up in your IDE, not on this doc page
# options = list(eu_data['country_name'].unique())
# labels = [option + ' ' for option in options]

# input_dropdown = alt.binding_select(
#     # Add the empty selection which shows all when clicked
#     options=options,
#     labels=labels,
#     name='Region: '
# )

# countryselection = alt.selection_point(
#     fields=['country_name'],
#     bind=input_dropdown,
# )

# slider2 = alt.binding_range(min=test['year'].min(), 
#                             max=new_eu_data['year'].max(), step=1, name='Current Year: ')

# yearselector = alt.selection_point(
#     name="YearSelector",
#     fields=['year'],
#     bind=slider2,
#     value=[{'year': 2000}]
# )

# search_input = alt.param(
#     # field = ['country_name'],
#     value = '',
#     bind = alt.binding(
#         input='search',
#         placeholder="Country Name",
#         name='Search For A Country: ',
#     )
# )

search_input = alt.selection_single(
    name='Select',
    # init={'year': 1991, 'country_name': 'Belgium'},
    fields=['country_name', 'year'],
    value=[{'country_name': 'Italy', 'year': 2001}],
    
    # init={'year': 2001},
    bind={
        'year': alt.binding_range(min=1990, max=2020, step=1, name='Select Year: '),
        'country_name': alt.binding_select(options=country_list, name="Select EU Country: "), 
        
          }
)

# brush = alt.selection_interval(
#     encodings=['x'] # limit selection to x-axis (year) values
# )

# bin=alt.Bin(extent=[-5, 5], step=0.5

immig = alt.Chart(test).mark_circle(size=150, stroke='black', strokeWidth=1.5, strokeOpacity=0.75).encode(
    alt.X('v2paimmig:Q', title='Views on Immigration (-5 = Very Opposed, 0 = Neutral, 5 = Very Supportive)', scale=alt.Scale(domain=[-5, 5])),
    # alt.Y('count():Q', title=None),
    # opacity=alt.condition(
    #     alt.expr.test(alt.expr.regexp(search_input, 'i'), alt.datum.country_name),
    #     alt.value(1.00),
    #     alt.value(0.05)
    # ),
    tooltip=[
        alt.Tooltip('v2paenname:N', title='Party Name: '),
        alt.Tooltip('country_name:N', title='EU Country: '),
        alt.Tooltip('year:Q', title="Year: "),
        alt.Tooltip('v2paimmig:Q', title="View on Immigration: ", format='.2f'),
    ],
).properties(
    width=700,
    height=50
).add_params(
    search_input
).transform_filter(
    search_input
)

minority = alt.Chart(test).mark_circle(size=150, stroke='black', strokeWidth=1.5, strokeOpacity=0.75).encode(
    alt.X('v2paminor:Q', title='Views on Minority Rights (-5 = Very Opposed, 0 = Neutral, 5 = Very Supportive)', scale=alt.Scale(domain=[-5, 5])),
    # alt.Y('count():Q', title=None),
    # opacity=alt.condition(
    #     alt.expr.test(alt.expr.regexp(search_input, 'i'), alt.datum.country_name),
    #     alt.value(1.00),
    #     alt.value(0.05)
    # ),
    tooltip=[
        alt.Tooltip('v2paenname:N', title='Party Name: '),
        alt.Tooltip('country_name:N', title='EU Country: '),
        alt.Tooltip('year:Q', title="Year: "),
        alt.Tooltip('v2paminor:Q', title="View on Minority Rights: ", format='.2f'),
    ],
).properties(
    width=700,
    height=50
).add_params(
    search_input
).transform_filter(
    search_input
)

gender = alt.Chart(test).mark_circle(size=150, stroke='black', strokeWidth=1.5, strokeOpacity=0.75).encode(
    alt.X('v2pagender:Q', title='Views on Gender Equality (-5 = Very Opposed, 0 = Neutral, 5 = Very Supportive)', scale=alt.Scale(domain=[-5, 5])),
    # alt.Y('count():Q', title=None),
    # opacity=alt.condition(
    #     alt.expr.test(alt.expr.regexp(search_input, 'i'), alt.datum.country_name),
    #     alt.value(1.00),
    #     alt.value(0.05)
    # ),
    tooltip=[
        alt.Tooltip('v2paenname:N', title='Party Name: '),
        alt.Tooltip('country_name:N', title='EU Country: '),
        alt.Tooltip('year:Q', title="Year: "),
        alt.Tooltip('v2pagender:Q', title="View on Gender Rights: ", format='.2f'),
    ],
).properties(
    width=700,
    height=50
).add_params(
    search_input
).transform_filter(
    search_input
)



# partyselect = alt.selection_point(fields=['party_abbreviation'], bind='legend')

chart = alt.Chart(test).mark_bar(stroke='black', strokeWidth=1).encode(
    x=alt.X('country_name:N', title='Names of EU Countries'),
    y=alt.Y('count():Q', title='Number of Political Parties'),
    color=alt.Color('year:Q', 
                    # bin=alt.Bin(maxbins=25),
                    scale=alt.Scale(
                        scheme='turbo', 
                        domain=[1990, 2020],
                    ),
                    title='Year',
                    legend=alt.Legend(gradientStrokeColor='black', gradientStrokeWidth=0.5, tickCount=5),
                    sort='descending'
                    
                    ),
    tooltip=[
        alt.Tooltip('country_name:N', title='EU Country: '),
        alt.Tooltip('count():Q', title='Number of Political Parties: '),
        alt.Tooltip('year:Q', title="Year: "),
        alt.Tooltip('mean(v2paminor):Q', title="Mean of Views on Minority Rights: ", format='.2f'),
        alt.Tooltip('stdev(v2paminor):Q', title="Standard Deviation of Views on Minority Rights: ", format='.2f'),
        alt.Tooltip('mean(v2paimmig):Q', title="Mean of Views on Immigration: ", format='.2f'),
        alt.Tooltip('stdev(v2paimmig):Q', title="Standard Deviation of Views on Immigration: ", format='.2f'),
        alt.Tooltip('mean(v2pagender):Q', title="Mean of Views on Gender Equality: ", format='.2f'),
        alt.Tooltip('stdev(v2paminor):Q', title="Standard Deviation of Views on Gender Equality: ", format='.2f'),
    ],
    order=alt.Order('year', sort='ascending'), # Sort the rectangles by year,
    opacity=alt.condition(
        # alt.expr.test(alt.expr.regexp(search_input, 'i'), alt.datum.country_name),
        search_input,
        alt.value(1.00),
        alt.value(0.20)
    ),

    # opacity=alt.condition(
    #     yearselector,
    #     alt.value(0.75),
    #     alt.value(0.05)
    # ),

    # order=alt.Order('group', sort='ascending'),
    # We need to set a constant domain to preserve the colors
    # when only one region is shown at a time
    # tooltip=['country','regionname','party_abbreviation'],
    # size=alt.Size('partyvote'),
    #opacity=alt.condition(partyselect, alt.value(1), alt.value(0.2)),
    #.scale(domain=options),
).add_params(
    search_input
    # countryselection
# ).transform_filter(
#     countryselection
# ).add_params(
#     immigselector
    # yearselector
# ).transform_filter(
#     yearselector
).properties(
    width=700,
    height=400,
    title=alt.TitleParams(
        'In EU Countries, how many parties were there over different election cycles, and what are their views?', 
        subtitle=[
            '',
            'Dataset Used: https://v-dem.net/data/the-v-dem-dataset/',
            '',
            'This visualization has tooltips! The circle plots below also have tooltips, along with panning and zooming!', 
            'Use the dynamic queries below to examine the distribution of the parties\' views on different policy issues!', 
            '(Make sure that the year in the query matches an election cycle year EXACTLY, otherwise no data will be shown.)',
            '',
            'In the circle plots below, if there\'s circles overlapping, you can zoom in to see the circles more separately!',
            'You can also double click on the circle plots to reset them to their original state!',
            '(A few of the parties may not have circles due to missing data.)',
            '',
            '(An additional idea for this visualization could be encoding the circle sizes with the percentage of vote the party won.)',
            ''
                  ],
        fontSize=15,
    )
    # title='In EU Countries, how many parties were there in different election cycles?'
)
# .configure_mark(
#     stroke='black', # Change outline color
#     strokeWidth=1 # Change outline width  
# )


# chart.save('index.html')

# chart

final = alt.vconcat(chart, minority, immig, gender).properties(spacing=40).interactive()

final


In [None]:
distrib = pd.read_csv('V-Dem-CPD-Party-V2.csv')
distrib.columns = distrib.columns.str.replace('v2paid', 'partyfacts_id')
eurovote = pd.read_csv('eu_ned_national.csv')
#eurovote.columns = eurovote.columns.str.replace('nuts2016', 'REF_AREA')
#eurovote.columns = eurovote.columns.str.replace('year', 'TIME_PERIOD')
eurovote['voteshare'] = eurovote['partyvote'] / eurovote['validvote']
eurovote.columns = eurovote.columns.str.replace('pf_party_id', 'partyfacts_id')
eurocombo = pd.merge(distrib, eurovote, on=['year','partyfacts_id'])
eurocombo = eurocombo.loc[:,['nuts2016','regionname','v2paenname','partyfacts_id','party_abbreviation','year','country_name','country_id','voteshare','partyvote','validvote','totalvote','v2xpa_antiplural']]
eurocombo.head()


In [None]:
eurogdp2 = pd.read_csv('OECDGDPcodes_some_convertnuts2016.csv')
eurogdp2 = eurogdp2.loc[:,['TERRITORIAL_LEVEL','TIME_PERIOD','OBS_VALUE','COUNTRY','REF_AREA']]
# eurocombo = pd.merge(eurocombo, eurogdp2[['REF_AREA','TIME_PERIOD', 'OBS_VALUE']], on=['REF_AREA','TIME_PERIOD'], how='left')
# eurocombo.loc[eurocombo['OBS_VALUE_y'].notna(), 'OBS_VALUE_x'] = eurocombo['OBS_VALUE_y']

# eurocombo = eurocombo.dropna(subset=['OBS_VALUE_x','OBS_VALUE_y'])
eurogdp2.columns = eurogdp2.columns.str.replace('OBS_VALUE', 'GDP per capita (PPP, constant prices 2015)')
eurogdp2.columns = eurogdp2.columns.str.replace('REF_AREA', 'nuts2016')
eurogdp2.columns = eurogdp2.columns.str.replace('TIME_PERIOD', 'year')
eurotriple = pd.merge(eurogdp2, eurocombo, on=['nuts2016','year'])
#eurotriple.head()
#eurotriple['country_name'].unique()

In [None]:
pearsonr([2,3],[5,10])[0]

In [None]:
alt.data_transformers.disable_max_rows()

eurotriplemod = eurotriple.copy()
#eurotriplemod['v2paenname'] = eurotriplemod['v2paenname'].apply(lambda x: '    \n' + '\n' + '\n' + str(x[:10] + '\n' + str(x[10:])))
#eurotriplemod['v2paenname'] = eurotriplemod['v2paenname'].apply(lambda x: ['          ','            ',str(x)])
eurotriplemod['v2paenname'] = eurotriplemod['v2paenname'].apply(wrap, args=[30])
eurotriplemod[' '] = eurotriplemod['v2paenname']

heatdf = []
for yr in eurotriplemod['year'].unique():
    for ctry in eurotriplemod['country_name'].unique():
        subset = eurotriplemod[(eurotriplemod['year'] == yr) & (eurotriplemod['country_name'] == ctry)]
        listslopes = [] 
        for party in subset['partyfacts_id'].unique():
            smallsub = subset[subset['partyfacts_id'] == party]
            if len(smallsub) >= 2:
                x = smallsub['voteshare']
                y = smallsub['GDP per capita (PPP, constant prices 2015)']
                slope, _ = np.polyfit(x,y,1)
                slope = abs(slope)
                # slope = r2_score(smallsub['GDP per capita (PPP, constant prices 2015)'], 
                #                       smallsub['voteshare'])
                slope = pearsonr(x,y)[0]
                if(ctry == 'Italy'):
                    if(yr == 2013):
                        #print('we are on',ctry,yr,'id of',party,'and got',slope)
                        # print(smallsub.loc[:,['GDP per capita (PPP, constant prices 2015)',
                        #                       'voteshare']][:2])
                        pass
                listslopes.append(abs(slope)*np.log(len(x)))
            else:
                slope = None
        heatdf.append({'year': yr, 'country_name': ctry, 'slope': np.mean(listslopes)})
heatdf = pd.DataFrame(heatdf)

# Define the selection for the heatmap
heatmap_selection = alt.selection_point(fields=['country_name', 'year'], 
                                         on='click', 
                                         value=[{'year': 2002,
                                                 'country_name':'Austria'}])

# Define the party selection
partyselect = alt.selection_single(fields=['party_abbreviation'], bind='legend')

# Define the base chart
base = alt.Chart(eurotriplemod).mark_point().encode(
    x=alt.X('GDP per capita (PPP, constant prices 2015):Q',
        title=['Region GDP per capita','(PPP, constant prices 2015)'],
        #scale=alt.Scale(type='log')
        axis=alt.Axis(tickCount=4)),
    y=alt.Y('voteshare:Q',title=['Party\'s Share of Votes','from Regional Votes Cast'],axis=alt.Axis(format='%')),
    tooltip=[alt.Tooltip('country_name:N',title='Country'),
            #alt.Tooltip('v2paenname:N',title='Party'),
            alt.Tooltip('year:O',title='Year'),
            alt.Tooltip('regionname:N',title='Region'),
            alt.Tooltip('nuts2016:N',title='Regional code'),
            alt.Tooltip('GDP per capita (PPP, constant prices 2015):Q',title='GDP per capita PPP',format=','),
            alt.Tooltip('voteshare:Q',title='Share of regional total vote',format='.1%'),
            alt.Tooltip('partyvote:N',title='Total votes in region for party',format=',')],
    size=alt.Size('partyvote:Q',legend=alt.Legend(orient='top',title=['Total Votes for Party','in Geographic Region'])),
    color=alt.condition(partyselect, 'party_abbreviation', alt.value('lightgray'),
                        title='Party Name (Short)',legend=None,scale=alt.Scale(scheme='tableau20')),
    #color=alt.Color('v2paenname',scale=alt.Scale(scheme='tableau20'),legend=None),
    opacity=alt.condition(partyselect, alt.value(1), alt.value(0.2))
).transform_filter(
    heatmap_selection
).add_params(
    partyselect
)

# Define the regression line for each party
regres = base.transform_filter(
    partyselect
).transform_regression('GDP per capita (PPP, constant prices 2015)', 'voteshare', groupby=['party_abbreviation']).mark_line()

# Define the heatmap
heatmap = alt.Chart(heatdf).mark_rect().encode(
    alt.Y('country_name:N', axis=alt.Axis(orient='right'),title='country'),
    x='year:O',
    color=alt.Color('slope', scale=alt.Scale(scheme='viridis'),title='Interest Metric'),
    #color = alt.condition(heatmap_selection, alt.Color('slope:Q',scale=alt.Scale(scheme='viridis'),).legend(None), alt.value('lightgray'),),
    tooltip=[alt.Tooltip('country_name:N',title='Country'),
            alt.Tooltip('year:O',title='Year'), 
            alt.Tooltip('slope:Q', title='Interest Metric',format='.2f')],
    opacity = alt.condition(heatmap_selection,alt.value(1), alt.value(0.4))
).add_params(
    heatmap_selection
).properties(width=430, height = 400)

#Combine the scatter plot, regression lines, and heatmap
scatteregress = (base + regres
                 ).resolve_scale(
                     x='shared'
                ).resolve_axis(
                     #x='shared'
                ).properties(
    width = 200, height = 100
    ).facet(
    #facet='',
    facet=alt.Facet('v2paenname', header=alt.Header(labelFontSize=12,title=None)),
    #align='each',
    title = alt.Title('Party (English Name)',align='center',anchor='middle'),
    columns=3,
    # column=alt.Column('v2paenname',#sort=['Low/Lower Middle', 'Upper Middle', 'High'],
    #             header=alt.Header(title='GDP per capita (PPP, constant prices 2015)',
    #                               labelFontSize=10,titleFontSize=15,titleOrient='bottom'),
    #             #align='each'
    #             ),
    spacing=50,
    #header=alt.Header(title='Trig'),
    #header=None,
    #padding=10,
    #align='each',
    bounds='flush',
    #title='Party (English Name)'
    #autosize='fit'
).add_params(
    heatmap_selection 
).transform_filter(heatmap_selection)#.properties(header=None)#width=500)

scatterheat = scatteregress | heatmap
scatterheat = scatterheat.properties(#width=500,
    title = alt.Title(
       ['','Relationship of GDP per capita and Voting Across Select OECD Countries and Parties, 2000-2019'],
       anchor='start',
       orient='top',
       offset = 10,
      fontSize = 20,
        subtitle=['Each bubble represents a geographic region',
      'Data is only for parliamentary elections. Turnout is not shown for each region, and should not be assumed to be constant',
      "Regression lines shown for individual parties. No claims are made about regression validity; interpretation is left to the viewer",
      'Data sources: Europe National Elections Database (eu-ned.com), OECD, V-Party / V-Dem'],
      
      subtitleFontSize=15
   )
)#.config(height=300)

scatterheat
#scatterheat.save('basgrand.html')

# 
#scatteregress.properties(width=200, height = 300)


In [None]:
#groupeuro = eurocombosubset.groupby(['country_name', 'partyfacts_id', 'year']).mean(numeric_only=False).reset_index()

nonconstant = ['voteshare', 'partyvote', 'totalvote','validvote','nuts2016','regionname']
constcolumns = [x for x in list(eurocombo.columns) if x not in nonconstant]
groupeuro = eurocombo.groupby(constcolumns).sum(numeric_only=True).reset_index()
groupeuro['countryvoteshare'] = groupeuro['partyvote'] / groupeuro['validvote']

unique_combinations = groupeuro[['country_name', 'partyfacts_id']].drop_duplicates()

# Initialize an empty DataFrame to store results
resultfilled = pd.DataFrame()

for _, row in unique_combinations.iterrows():
    # Filter df for current country and party
    temp_df = groupeuro[(groupeuro['country_name'] == row['country_name']) & 
                 (groupeuro['partyfacts_id'] == row['partyfacts_id'])]
    
    # temp_df = temp_df.sort_values('year', ascending = True)

    # df2 = pd.DataFrame({'year': np.arange(temp_df['year'].min(), groupeuro['year'].max())})

    this_country = groupeuro[(groupeuro['country_name'] == row['country_name'])]
    list_years = sorted(list(this_country['year'].unique()))

    curr_year = temp_df['year'].max()
    next_year = None
    if((list_years.index(curr_year)+1) == len(list_years)):
        next_year = groupeuro['year'].max()+1
        #print('yes')
    else:
        next_year = list_years[list_years.index(curr_year)+1]
    
    df2 = pd.DataFrame({'year': np.arange(temp_df['year'].min(), next_year)})#eurocombo['year'].max())})

    # Left join with df2 and forward fill the missing values
    temp_df = pd.merge(df2, temp_df, on='year', how='left').ffill()
    
    # Append the result to the final DataFrame
    resultfilled = pd.concat([resultfilled, temp_df], ignore_index=True)

# Reset the index of the final DataFrame
resultfilled.reset_index(drop=True, inplace=True)
resultfilled.head()


In [None]:
groupeuro[groupeuro['country_name'] == 'Austria'].head()

In [None]:
slider = alt.binding_range(min=1990, max=2020, step=1)
select_year = alt.selection_point(name='Select', fields=['year'], bind=slider, value=[{'year': 2000}])

selectioncountry = alt.selection_point(fields=['country_name'])
colorsel = alt.condition(
    selectioncountry,
    alt.Color('country_name:N').legend(None),
    alt.value('lightgray')
)

brushcountry = alt.selection_interval(
    encodings=['x'] # limit selection to x-axis (year) values
)

legendcountry = alt.Chart(resultfilled).mark_rect().encode(
    alt.Y('country_name:N').axis(orient='right'),
    color=colorsel
).add_params(
    selectioncountry
)#.transform_filter(selection)

partyviewbase = alt.Chart(resultfilled).mark_point().add_params(
     #brushcountry
).encode(
    y='country_name:N',
    x='v2xpa_antiplural:Q',
    tooltip=[alt.Tooltip('country_name:N',title='Country'),
            alt.Tooltip('year:O',title='Year'), 
            alt.Tooltip('v2paenname:N',title='Party'),
            #alt.Tooltip('countryvoteshare:Q',title='Share of national vote'),
            ],
    #color=alt.condition(brushcountry, 'v2xpa_antiplural:Q', alt.value('lightgray')),
    opacity=alt.condition(brushcountry, alt.OpacityValue(1), alt.OpacityValue(0.5)),
    #size='countryvoteshare:Q'
    #size=alt.Size('countryvoteshare:Q',scale=alt.Scale(domain=[0, 1], range=[0,1000]))
).add_params(
    select_year
).transform_filter(
    select_year
).add_params(
    selectioncountry
).transform_filter(
    selectioncountry
).properties(width = 500)


partydist = alt.Chart(resultfilled).mark_bar().transform_calculate(
    #max='max(v2xpa_antiplural)'
).encode(
    x=alt.X('v2xpa_antiplural:Q',bin=alt.Bin(maxbins=30),title='Party level of antipluralism (1 = least democratic norms)'),
    y=alt.Y('count()',title='Count of Parties'),
    #opacity=alt.condition(brushcountry, alt.OpacityValue(1), alt.OpacityValue(0.5)),
    #tooltip='count()'
).add_params(
    brushcountry
).add_params(
    select_year
).transform_filter(
    select_year
).add_params(
    selectioncountry
).transform_filter(
    selectioncountry
)

distbg = partydist.encode(
    color=alt.value('#ddd')
).add_params(brushcountry)

highlight = partydist.transform_filter(brushcountry)

# Combine the scat

# Render the chart
fulldist = (distbg+highlight).properties(width=500, height=100)
alt.vconcat(fulldist, partyviewbase)
#partyviewbase
#(alt.vconcat(fulldist, partyviewbase) | legendcountry).properties(title='Distribution of party identities for select EU countries, 1990-2020')


In [None]:
# Get unique combinations of country and party
eurovotetemp = eurovote.copy()
eurovotetemp = eurovotetemp.dropna(subset='partyfacts_id')
#eurovotetemp = eurovotetemp.loc[:,['nuts2016', 'country', 'partyfacts_id','voteshare','partyvote','validvote','totalvote','year']]
unique_combinations = eurovotetemp[['nuts2016', 'country', 'partyfacts_id']].drop_duplicates()

# Initialize an empty DataFrame to store results
resultfilled_slow = pd.DataFrame()

print(len(unique_combinations))
sr = 0
for _, row in unique_combinations.iterrows():
    # Filter df for current country and party
    #print(row['partyfacts_id'])
    sr += 1
    print(sr)
    temp_df = eurovotetemp[(eurovotetemp['country'] == row['country']) & 
                 (eurovotetemp['partyfacts_id'] == row['partyfacts_id']) & 
                 (eurovotetemp['nuts2016'] == row['nuts2016'])]
    
    temp_df = temp_df.sort_values('year', ascending = True)

    this_country = eurovotetemp[(eurovotetemp['country'] == row['country'])]
    list_years = sorted(list(this_country['year'].unique()))

    curr_year = temp_df['year'].max()
    next_year = None
    if((list_years.index(curr_year)+1) == len(list_years)):
        next_year = eurovotetemp['year'].max()+1
        #print('yes')
    else:
        next_year = list_years[list_years.index(curr_year)+1]
    
    df2 = pd.DataFrame({'year': np.arange(temp_df['year'].min(), next_year)})#eurocombo['year'].max())})

    if((row['nuts2016'] == 'LT023') and (row['partyfacts_id'] == 237)):
        #print(temp_df.loc[:,['nuts2016','partyfacts_id','year','partyvote']])
        print('the row',row)
    # Left join with df2 and forward fill the missing values
    temp_df = pd.merge(df2, temp_df, on='year', how='left').ffill()
    if((row['nuts2016'] == 'LT023') and (row['partyfacts_id'] == 237)):
        print('post')
        #print(temp_df.loc[:,['nuts2016','partyfacts_id','year','partyvote']])
    
    # Append the result to the final DataFrame
    resultfilled_slow = pd.concat([resultfilled_slow, temp_df], ignore_index=True)

# Reset the index of the final DataFrame
resultfilled_slow.reset_index(drop=True, inplace=True)

In [None]:
#eurovote[eurovote['nuts2016'] == 'FRL01'].sort_values('year', ascending = True)[30:50]

In [None]:
#eurocombo[eurocombo['nuts2016'] == 'FRL01'].sort_values('year', ascending = True)

In [None]:
#resultfilled_slow[resultfilled_slow['nuts2016'] == 'LT023'].sort_values('year', ascending = True)[55:75]

In [None]:
eurocombo.head()

In [None]:
limitedparty = distrib.copy()
# Get unique combinations of country and party
unique_combinations = limitedparty[['partyfacts_id']].drop_duplicates()

# Initialize an empty DataFrame to store results
limitedpartyallyear = pd.DataFrame()

for _, row in unique_combinations.iterrows():
    # Filter df for current country and party
    
    temp_df = limitedparty[(limitedparty['partyfacts_id'] == row['partyfacts_id'])]
    
    temp_df = temp_df.sort_values('year', ascending = True)

    # this_country = eurocombo[(eurocombo['country_name'] == row['country_name'])]
    # list_years = sorted(list(this_country['year'].unique()))

    # curr_year = temp_df['year'].max()
    # next_year = None
    # if((list_years.index(curr_year)+1) == len(list_years)):
    #     next_year = eurocombo['year'].max()+1
    #     #print('yes')
    # else:
    #     next_year = list_years[list_years.index(curr_year)+1]
    
    df2 = pd.DataFrame({'year': np.arange(temp_df['year'].min(), limitedparty['year'].max()+1)})#eurocombo['year'].max())})

    # Left join with df2 and forward fill the missing values
    temp_df = pd.merge(df2, temp_df, on='year', how='left').ffill()
    
    # Append the result to the final DataFrame
    limitedpartyallyear = pd.concat([limitedpartyallyear, temp_df], ignore_index=True)

# Reset the index of the final DataFrame
limitedpartyallyear.reset_index(drop=True, inplace=True)
limitedpartyallyear.head(5)

In [None]:
eurocombomod = pd.merge(limitedpartyallyear, resultfilled_slow, on=['year','partyfacts_id'])
#eurocombomod = eurocombomod.loc[:,['nuts2016','v2paenname','partyfacts_id','year','country_name','voteshare','partyvote','validvote','totalvote','v2xpa_antiplural']]
eurocombomod = eurocombomod.loc[:,['nuts2016','regionname','v2paenname','partyfacts_id','party_abbreviation','year','country_name','country_id','voteshare','partyvote','validvote','totalvote','v2xpa_antiplural','v2paimmig_osp']]
eurocombomod.head()

In [None]:
alt.data_transformers.disable_max_rows()

topojson_data_nuts3 = 'NUTS_RG_03M_2016_3035_LEVL_3.json'
nuts3geo = alt.topo_feature(topojson_data_nuts3, 'NUTS_RG_03M_2016_3035')


euromap2 = eurocombomod.copy()

grandict = {#'v2xpa_antiplural':[0,0.2,0.4,0.6,0.8,1],
            'v2paimmig_osp':[0,1,2,3,4]
            }
labeldict = {#'v2xpa_antiplural':'Antidemocratic (0 least, 1 most): ',
             'v2paimmig_osp':'Immigration support (0 least, 4 most): '}
ouroptions = []

for ke in grandict.keys():
    for i in range(0,len(grandict[ke])-1):

        test_df = euromap2[(euromap2[ke] >= grandict[ke][i]) &
                           (euromap2[ke] < grandict[ke][i+1])]
        
        test_df = test_df.groupby(['nuts2016','year']).sum(numeric_only=True).reset_index()
        
        the_few = test_df.loc[:,['nuts2016','year','voteshare']]

        labelfull = labeldict[ke] + str(grandict[ke][i]) + ' to ' + str(grandict[ke][i+1])

        ouroptions.append(labelfull)
        the_few.columns = the_few.columns.str.replace('voteshare', labelfull)

        euromap2 = pd.merge(euromap2, the_few, on=['nuts2016','year'], how='left')
        euromap2[labelfull] = euromap2[labelfull].fillna(0)
        #print('still in loop')

dropdown = alt.binding_select(
    options=ouroptions,
    name='Party indicator'
)
xcol_param = alt.param(
    value=ouroptions[0],
    bind=dropdown
)

notinclude = ['FRY10','FRY20','FRY30','FRY40','FRY50', 'PT200', 'PT300',
              'ES707', 'ES703', 'ES706', 'ES709', 'ES705', 'ES704', 'ES708']
euromap2 = euromap2[~euromap2['nuts2016'].isin(notinclude)]

# Create a dropdown selection
euromap2['antipluralcat'] = '0 to 0.5'
euromap2.loc[euromap2['v2xpa_antiplural'] > 0.5, 'antipluralcat'] = '0.5 to 1'
selectiondrop2 = alt.selection_point(name='selection',
                                    fields=['antipluralcat'], 
                                     bind=alt.binding_select(options=['0 to 0.5', '0.5 to 1']),
                                     value=[{'antipluralcat': '0 to 0.5'}])

# immilist = ['0 to 1']
# #euromap2['immicat'] = '0 to 1'
# immilist = []
# for i in range(0,4):
#     catentry = str(i) + ' to ' + str(i+1)
#     immilist.append(catentry)
#     euromap2.loc[(euromap2['v2paimmig_osp'] > i) & (euromap2['v2paimmig_osp'] < (i + 1)), 'immicat'] = catentry

# selectiondrop2 = alt.selection_point(fields=['immicat'], 
#                                      bind=alt.binding_select(options=immilist),
#                                      value=[{'immicat': immilist[0]}])

ref_area_selection = alt.selection_single(fields=['nuts2016'], bind=alt.binding_select(options=list(euromap2['nuts2016'].unique())))

slider_more_time = alt.binding_range(min=euromap2['year'].min(), 
                            max=euromap2['year'].max(), step=1, name='Current Year: ')

yearselector = alt.selection_point(
    name="SelectorName",
    fields=['year'],
    bind=slider_more_time,
    value=[{'year': 2000}]
)

topojson_data_nuts3 = 'NUTS_RG_03M_2016_3035_LEVL_3.json'
base = alt.Chart(alt.topo_feature(topojson_data_nuts3, 'NUTS_RG_03M_2016_3035')).mark_geoshape(
    stroke='#aaa', strokeWidth=0.25, fill='lightgray'
).transform_lookup(
    lookup='id', from_=alt.LookupData(data=euromap2, key='REF_AREA', fields=['voteshare'])
).transform_filter(
    {'not':alt.FieldOneOfPredicate(field='id', oneOf=notinclude)}
).encode().project(
    type='identity',reflectY=True
).properties(
    width=900,
    height=500
)

lets = alt.Chart(euromap2).transform_lookup(
    lookup='nuts2016', from_=alt.LookupData(data=nuts3geo, key='id'), as_='geom'#, fields=['totalvote','REF_AREA','voteshare','antipluralcat','v2xpa_antiplural'])
).transform_filter(
    'datum.geom !== null'  # Filter out records without a match in source2
# ).add_params(
#     yearselector
# ).transform_filter(
#     yearselector
# ).add_params(selectiondrop2
# ).transform_filter(
#     selectiondrop2
# ).transform_aggregate(
#     Gross='sum(voteshare)',
#     avgantiplural = 'mean(v2xpa_antiplural)',
#     groupby=['nuts2016','geom','year','regionname','country_name']
).transform_calculate(
    x=f'datum[{xcol_param.name}]'
).add_params(
    xcol_param
).transform_calculate(
    #Gross2 = 'datum.Gross',
    geometry ='datum.geom.geometry',
    type= 'datum.geom.type'
).mark_geoshape(
    #stroke='#aaa', strokeWidth=0.25
).encode(
    alt.Color('x:Q',title=['Party(s) Share of Vote in','Geographic Region'],
              scale=alt.Scale(domain=(0, 1))),
    tooltip = [alt.Tooltip('x:Q',title='Share of vote',format='.1%'), 
               alt.Tooltip('nuts2016:N',title='Region code'),
               alt.Tooltip('regionname:N',title='Region'),
               alt.Tooltip('country_name:N',title='Country'),
               #alt.Tooltip('avgantiplural:Q',title='Average antipluralism score, selected parties',format='.2f')
               ]
).project(
    type='identity',reflectY=True

).properties(
    width=900,
    height=500
)

#lets.mark_geoshape(stroke='#aaa', strokeWidth=0.25, fill='lightgray')
baselets = base.mark_geoshape(fill='lightgray') + lets.add_params(yearselector).transform_filter(yearselector)

baselets = baselets.properties(title={
                    "text": ['Regional Percent Voting for Select European Political Parties, By Party Immigration Score'], 
                    "subtitle": ['Only parties with both election and ideology score data shown; data not available for all years',
                                 'Data shown for latest election and latest party indicator. E.g. if a party\'s latest score was in 2013, 2015 will use that data', 
                              'Voting shares for each indicator category may not sum to 100% since some parties have no ideology score, and are not counted',
                              'Note that party indicators can change very election, causing some parties to disappear from a specific political category range'],
                    #"color": "red",
                    #"subtitleColor": "green"
                    })
#baselets.save('baselet.html')
baselets
# .transform_filter(
#     selectionspecial
# ).add_selection(
#     selectionspecial
# )
# .add_params(selectiondrop2
# ).transform_filter(
#     selectiondrop2
# ) 
#.mark_geoshape(stroke='#aaa', strokeWidth=0.25, fill='lightgray')

In [None]:
slider = alt.binding_range(min=1990, max=distrib['year'].max(), step=1)
select_year = alt.selection_single(name="Select", fields=['year'],
                                   bind=slider, #init={'Year': 2000}
                                   )
dropdown = alt.binding_select(options=distrib['country_name'].unique())
select_country = alt.selection_single(fields=['country_name'], bind=dropdown, name="Country")

alt.Chart(distrib).mark_bar().encode(
    alt.X("v2xpa_antiplural:Q", bin=True),
    y='count()',
).add_params(
    select_year
).transform_filter(
    select_year
).add_selection(
    select_country
).transform_filter(
    select_country
)