In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import pickle

In [2]:
with open('../data/country_codes_dict.pickle', 'rb') as f:
    codes = pickle.load(f)

In [3]:
df = pd.read_csv('../data/eurovision_merged_covariates_27Jan.csv')

In [4]:
# reverse the dictionary
codes = {v: k for k, v in codes.items()}

In [5]:
# variable names to lowercase and _ instead of spaces
df.columns = df.columns.str.lower().str.replace(' ', '_')
# add countries to codes in df with codes df
df['from_country_name'] = df['from_country'].map(codes)
df['country_name'] = df['country'].map(codes)

In [6]:
# how often did each country participate?
n_participations = df.groupby('country_name')['year'].nunique().sort_values(ascending=True)
# filter countries with fewer than 5 and turn into list
countries_to_remove = n_participations[n_participations < 3].index.tolist()
# remove from df
df = df[~df['country_name'].isin(countries_to_remove)]
df

Unnamed: 0,unnamed:_0,year,from_country,votes,country,own,english,other,has_border,from_country_name,country_name
0,3,1999,AT,0.0,AT,0.0,1.0,0.0,,austria,austria
1,6,1999,AT,12.0,BA,0.0,0.0,1.0,,austria,bosnia and herzegovina
2,7,1999,AT,0.0,BE,0.0,1.0,0.0,,austria,belgium
3,11,1999,AT,0.0,CY,1.0,0.0,0.0,,austria,cyprus
4,13,1999,AT,10.0,DE,0.0,0.0,1.0,1.0,austria,germany
...,...,...,...,...,...,...,...,...,...,...,...
19214,37619,2019,SM,1.0,NO,0.0,0.0,1.0,,san marino,norway
19215,37623,2019,SM,0.0,RS,0.0,0.0,1.0,,san marino,serbia
19216,37624,2019,SM,12.0,RU,0.0,1.0,0.0,,san marino,russia
19217,37625,2019,SM,0.0,SE,0.0,1.0,0.0,,san marino,sweden


Take average votes over the years for each country pair.

In [7]:
df2 = (df
    .groupby(['from_country', 'country', 'from_country_name', 'country_name'])
    #.agg({'votes': 'mean'})
    .agg(votes = ('votes', 'mean'), num_years = ('year', 'count'))
    .reset_index()
    .sort_values('votes', ascending=False)
)

In [8]:
df2

Unnamed: 0,from_country,country,from_country_name,country_name,votes,num_years
1959,TR,AZ,turkey,azerbaijan,12.000000,5
251,AZ,TR,azerbaijan,turkey,12.000000,4
1397,ME,RS,montenegro,serbia,12.000000,7
1690,RO,MD,romania,moldova,11.800000,10
488,CY,GR,cyprus,greece,11.733333,15
...,...,...,...,...,...,...
1313,MC,RU,monaco,russia,0.000000,3
1311,MC,PL,monaco,poland,0.000000,1
1911,SK,TR,slovakia,turkey,0.000000,3
1309,MC,NL,monaco,the netherlands,0.000000,1


In [9]:
# merge df2 with itself but with from_country and country swapped and from_country_name and country_name swapped
df3 = (df2
    .merge(df2.rename(columns={'from_country': 'country', 
                               'country': 'from_country', 
                               'from_country_name': 'country_name', 
                               'country_name': 'from_country_name'}), 
           on=['from_country', 'country', 'from_country_name', 'country_name', 'num_years'])
    # remove duplicates
    .drop_duplicates()
    # remove rows where from_country == country
    .query('from_country != country')
    # calculate the absolute difference in votes
    .assign(votes_diff = lambda x: abs(x['votes_x'] - x['votes_y']))
    # re-order num_years column before votes_x
    .reindex(columns=['from_country', 'country', 'from_country_name', 'country_name', 'num_years', 'votes_x', 'votes_y', 'votes_diff'])
)
df3

Unnamed: 0,from_country,country,from_country_name,country_name,num_years,votes_x,votes_y,votes_diff
0,BA,RS,bosnia and herzegovina,serbia,6,11.333333,10.166667,1.166667
1,RS,BA,serbia,bosnia and herzegovina,6,10.166667,11.333333,1.166667
2,RU,AZ,russia,azerbaijan,10,9.400000,7.900000,1.500000
3,CH,PT,switzerland,portugal,5,9.400000,2.800000,6.600000
4,MK,BG,f.y.r. macedonia,bulgaria,4,9.250000,6.250000,3.000000
...,...,...,...,...,...,...,...,...
210,AM,HU,armenia,hungary,8,0.000000,1.250000,1.250000
215,PT,PL,portugal,poland,8,0.000000,1.500000,1.500000
218,BE,MK,belgium,f.y.r. macedonia,8,0.000000,1.125000,1.125000
222,LT,AL,lithuania,albania,9,0.000000,0.111111,0.111111


In [10]:
# from_country and country contain same pairs, but in different orders
# make combined column with pairs in alphabetical order
df3['country_pair'] = df3[['from_country_name', 'country_name']].apply(lambda x: ' - '.join(sorted(x)), axis=1)
# remove duplicate country pairs
df3 = df3.drop_duplicates(subset=['country_pair'])
# remove country_pair column
df3 = df3.drop(columns=['country_pair'])

In [11]:
# high votes, low diff
top_lovers = (df3
    .query('votes_diff < 3')
    .sort_values('votes_x', ascending=False).head(5)
)
top_lovers

Unnamed: 0,from_country,country,from_country_name,country_name,num_years,votes_x,votes_y,votes_diff
0,BA,RS,bosnia and herzegovina,serbia,6,11.333333,10.166667,1.166667
2,RU,AZ,russia,azerbaijan,10,9.4,7.9,1.5
7,SE,AU,sweden,australia,5,8.2,7.8,0.4
10,BG,AT,bulgaria,austria,4,7.5,4.75,2.75
15,SI,MK,slovenia,f.y.r. macedonia,7,6.285714,4.0,2.285714


In [12]:
top_haters = (df3
    .query('votes_diff < 3')
    .sort_values('votes_x', ascending=True).head(5))
top_haters

Unnamed: 0,from_country,country,from_country_name,country_name,num_years,votes_x,votes_y,votes_diff
174,AL,LT,albania,lithuania,9,0.111111,0.0,0.111111
169,IS,GE,iceland,georgia,7,0.285714,0.0,0.285714
161,RS,TR,serbia,turkey,5,0.6,0.0,0.6
147,BY,SI,belarus,slovenia,6,0.833333,0.0,0.833333
138,LT,IL,lithuania,israel,13,1.0,0.769231,0.230769


In [13]:
top_unequals = (df3
            .sort_values('votes_diff', ascending=False).head(5))
top_unequals

Unnamed: 0,from_country,country,from_country_name,country_name,num_years,votes_x,votes_y,votes_diff
6,TR,AM,turkey,armenia,5,8.8,0.8,8.0
3,CH,PT,switzerland,portugal,5,9.4,2.8,6.6
11,ES,IT,spain,italy,9,7.444444,1.333333,6.111111
12,FR,IT,france,italy,9,7.222222,1.666667,5.555556
17,IT,UA,italy,ukraine,7,6.142857,0.857143,5.285714


In [14]:
# combine top_lovers, top_haters and top_unequals into one df with a new column for type of relationship
top_relationships = (pd.concat([top_lovers, top_haters, top_unequals]))
# add group column first five rows are lovers, next five are haters, next five are unequals
top_relationships['group'] = ['lovers'] * 5 + ['haters'] * 5 + ['unequals'] * 5
top_relationships

Unnamed: 0,from_country,country,from_country_name,country_name,num_years,votes_x,votes_y,votes_diff,group
0,BA,RS,bosnia and herzegovina,serbia,6,11.333333,10.166667,1.166667,lovers
2,RU,AZ,russia,azerbaijan,10,9.4,7.9,1.5,lovers
7,SE,AU,sweden,australia,5,8.2,7.8,0.4,lovers
10,BG,AT,bulgaria,austria,4,7.5,4.75,2.75,lovers
15,SI,MK,slovenia,f.y.r. macedonia,7,6.285714,4.0,2.285714,lovers
174,AL,LT,albania,lithuania,9,0.111111,0.0,0.111111,haters
169,IS,GE,iceland,georgia,7,0.285714,0.0,0.285714,haters
161,RS,TR,serbia,turkey,5,0.6,0.0,0.6,haters
147,BY,SI,belarus,slovenia,6,0.833333,0.0,0.833333,haters
138,LT,IL,lithuania,israel,13,1.0,0.769231,0.230769,haters


In [15]:
# save to csv
top_relationships.to_csv('../data/top_relationships.csv', index=False)

In [16]:
import plotly.express as px

In [17]:
df = px.data.iris()
fig = px.parallel_coordinates(df, color="species_id",
                              dimensions=['sepal_width', 'sepal_length', 'petal_width',
                                          'petal_length'],
                              color_continuous_scale=px.colors.diverging.Tealrose,
                              color_continuous_midpoint=2)

In [18]:
fig.show()


In [21]:
top_relationships

Unnamed: 0,from_country,country,from_country_name,country_name,num_years,votes_x,votes_y,votes_diff,group
0,BA,RS,bosnia and herzegovina,serbia,6,11.333333,10.166667,1.166667,lovers
2,RU,AZ,russia,azerbaijan,10,9.4,7.9,1.5,lovers
7,SE,AU,sweden,australia,5,8.2,7.8,0.4,lovers
10,BG,AT,bulgaria,austria,4,7.5,4.75,2.75,lovers
15,SI,MK,slovenia,f.y.r. macedonia,7,6.285714,4.0,2.285714,lovers
174,AL,LT,albania,lithuania,9,0.111111,0.0,0.111111,haters
169,IS,GE,iceland,georgia,7,0.285714,0.0,0.285714,haters
161,RS,TR,serbia,turkey,5,0.6,0.0,0.6,haters
147,BY,SI,belarus,slovenia,6,0.833333,0.0,0.833333,haters
138,LT,IL,lithuania,israel,13,1.0,0.769231,0.230769,haters


In [20]:
fig = px.parallel_coordinates(
    top_relationships, 
    color="group", 
    dimensions=["votes_x", "votes_y"],
    #labels={"species_id": "Species","sepal_width": "Sepal Width", "sepal_length": "Sepal Length", "petal_width": "Petal Width", "petal_length": "Petal Length", },
    #color_continuous_scale=px.colors.diverging.Tealrose,
    #color_continuous_midpoint=2
    )

ValueError: 
    Invalid element(s) received for the 'color' property of parcoords.line
        Invalid elements include: ['lovers', 'lovers', 'lovers', 'lovers', 'lovers', 'haters', 'haters', 'haters', 'haters', 'haters']

    The 'color' property is a color and may be specified as:
      - A hex string (e.g. '#ff0000')
      - An rgb/rgba string (e.g. 'rgb(255,0,0)')
      - An hsl/hsla string (e.g. 'hsl(0,100%,50%)')
      - An hsv/hsva string (e.g. 'hsv(0,100%,100%)')
      - A named CSS color:
            aliceblue, antiquewhite, aqua, aquamarine, azure,
            beige, bisque, black, blanchedalmond, blue,
            blueviolet, brown, burlywood, cadetblue,
            chartreuse, chocolate, coral, cornflowerblue,
            cornsilk, crimson, cyan, darkblue, darkcyan,
            darkgoldenrod, darkgray, darkgrey, darkgreen,
            darkkhaki, darkmagenta, darkolivegreen, darkorange,
            darkorchid, darkred, darksalmon, darkseagreen,
            darkslateblue, darkslategray, darkslategrey,
            darkturquoise, darkviolet, deeppink, deepskyblue,
            dimgray, dimgrey, dodgerblue, firebrick,
            floralwhite, forestgreen, fuchsia, gainsboro,
            ghostwhite, gold, goldenrod, gray, grey, green,
            greenyellow, honeydew, hotpink, indianred, indigo,
            ivory, khaki, lavender, lavenderblush, lawngreen,
            lemonchiffon, lightblue, lightcoral, lightcyan,
            lightgoldenrodyellow, lightgray, lightgrey,
            lightgreen, lightpink, lightsalmon, lightseagreen,
            lightskyblue, lightslategray, lightslategrey,
            lightsteelblue, lightyellow, lime, limegreen,
            linen, magenta, maroon, mediumaquamarine,
            mediumblue, mediumorchid, mediumpurple,
            mediumseagreen, mediumslateblue, mediumspringgreen,
            mediumturquoise, mediumvioletred, midnightblue,
            mintcream, mistyrose, moccasin, navajowhite, navy,
            oldlace, olive, olivedrab, orange, orangered,
            orchid, palegoldenrod, palegreen, paleturquoise,
            palevioletred, papayawhip, peachpuff, peru, pink,
            plum, powderblue, purple, red, rosybrown,
            royalblue, rebeccapurple, saddlebrown, salmon,
            sandybrown, seagreen, seashell, sienna, silver,
            skyblue, slateblue, slategray, slategrey, snow,
            springgreen, steelblue, tan, teal, thistle, tomato,
            turquoise, violet, wheat, white, whitesmoke,
            yellow, yellowgreen
      - A number that will be interpreted as a color
        according to parcoords.line.colorscale
      - A list or array of any of the above