In [3]:
from glob import glob
from os.path import exists

import pandas as pd
import altair as alt
import requests

In [4]:
# Thanks ChatGPT
COLORS = {
    'French': '#1f77b4',          # Blue
    'German': '#ff7f0e',          # Orange
    'Dutch': '#2ca02c',           # Green
    'Danish': '#d62728',          # Red
    'Italian': '#9467bd',         # Purple
    'English': '#8c564b',         # Brown
    'Swedish': '#e377c2',         # Pink
    'Luxembourgish': '#7f7f7f',   # Gray
    'Norwegian': '#bcbd22',       # Olive
    'Finnish': '#17becf',         # Cyan
    'Spanish': '#1f78b4',         # Blue (darker)
    'Serbo-Croatian': '#ff9896',  # Pink (lighter)
    'Portuguese': '#98df8a',      # Green (lighter)
    'Slovene': '#c5b0d5',         # Purple (lighter)
    'Viennese': '#c49c94',        # Brown (lighter)
    'Maltese': '#f7b6d2',         # Pink (lighter)
    'Irish': '#c7c7c7',           # Gray (lighter)
    'Hebrew': '#ffbb78',          # Orange (lighter)
    'Greek': '#2ca02c',           # Green (reused)
    'Turkish': '#d62728',         # Red (reused)
    'Arabic': '#8c564b',          # Brown (reused)
    'Northern Sami': '#17becf',   # Cyan (reused)
    'Icelandic': '#7f7f7f',       # Gray (reused)
    'Romansh': '#bcbd22',         # Olive (reused)
    'Neapolitan': '#1f77b4',      # Blue (reused)
    'Antillean Creole': '#ff7f0e',# Orange (reused)
    'Croatian': '#e377c2',        # Pink (reused)
    'Corsican': '#c49c94',        # Brown (reused)
    'Bosnian': '#f7b6d2',         # Pink (lighter)
    'Estonian': '#c7c7c7',        # Gray (lighter)
    'Hungarian': '#ff9896',       # Pink (lighter)
    'Lithuanian': '#98df8a',      # Green (lighter)
    'Polish': '#c5b0d5',          # Purple (lighter)
    'Romanian': '#ffbb78',        # Orange (lighter)
    'Russian': '#17becf',         # Cyan (reused)
    'Slovak': '#bcbd22',          # Olive (reused)
    'Vorarlbergish': '#1f77b4',   # Blue (reused)
    'Breton': '#ff7f0e',          # Orange (reused)
    'Macedonian': '#d62728',      # Red (reused)
    'Samogitian': '#9467bd',      # Purple (reused)
    'Serbian': '#8c564b',         # Brown (reused)
    'Imaginary': '#e377c2',       # Pink (reused)
    'Catalan': '#7f7f7f',         # Gray (reused)
    'Võro': '#bcbd22',            # Olive (reused)
    'Latvian': '#17becf',         # Cyan (reused)
    'Ukrainian': '#1f78b4',       # Blue (darker)
    'Montenegrin': '#ff9896',     # Pink (lighter)
    'Albanian': '#98df8a',        # Green (lighter)
    'Tahitian': '#c5b0d5',        # Purple (lighter)
    'Armenian': '#c49c94',        # Brown (lighter)
    'Bulgarian': '#f7b6d2',       # Pink (lighter)
    'Czech': '#c7c7c7',           # Gray (lighter)
    'English ("Franglais")': '#ffbb78', # Orange (lighter)
    'Romani': '#2ca02c',          # Green (reused)
    'Swahili': '#d62728',         # Red (reused)
    'Georgian': '#9467bd',        # Purple (reused)
    'Udmurt': '#8c564b',          # Brown (reused)
    'Crimean Tatar': '#e377c2',   # Pink (reused)
    'Belarusian': '#7f7f7f',      # Gray (reused)
    'Srnan Tongo': '#bcbd22',     # Olive (reused)
    'Latin': '#17becf'            # Cyan (reused)
}


### Functions to make charts

In [5]:
def get_languages(data):
    return data["Language"].unique()

In [6]:
def create_year_chart(year, data):
    filtered_data = data[data["Year"] ==  year]
    if filtered_data.empty:
        return f"No data for {year}"
    return alt.Chart(filtered_data).mark_bar().encode(
        x=alt.X('Country', title='Country', sort='-y'),
        y=alt.Y('Grand Final Points', title='Total Grand Final Points'),
        color=alt.Color('Language', scale=alt.Scale(domain=list(COLORS.keys()), range=list(COLORS.values()))).legend(None),
        tooltip=["Country", 'Language', 'Grand Final Points']
    ).properties(
        title=f'Total Grand Final Points in {year}',
        width=1000
    )


In [7]:
def make_count_chart(data):
    return alt.Chart(data).mark_bar().encode(
        y=alt.Y('count()', title='Number of Songs'),
        x=alt.X('Language', title='Language', sort="-y"),
        color=alt.Color('Language', scale=alt.Scale(domain=list(COLORS.keys()), range=list(COLORS.values()))).legend(None),
        tooltip=['Language', 'count()']
    ).properties(
        title="Total number of times a language is used in a song",
        width=1000
    )
 

In [8]:
def make_points_chart(data):
    return alt.Chart(data).mark_bar().encode(
    y=alt.Y("sum(Grand Final Points)", title="Sum of points won"),
    x=alt.X("Language", title="Language", sort="-y"),
    color=alt.Color('Language', scale=alt.Scale(domain=list(COLORS.keys()), range=list(COLORS.values()))).legend(None),
    tooltip=['Language', 'sum(Grand Final Points)']
).properties(
    title="Total points scored using x language",
    width=1000
)

In [9]:
def make_averages_chart(data):
    return alt.Chart(data).mark_bar().encode(
    y=alt.Y("median(Grand Final Points)"),
    x=alt.X("Language", sort="-y"),
    tooltip=['Language', 'median(Grand Final Points)'],
    color=alt.Color('Language', scale=alt.Scale(domain=list(COLORS.keys()), range=list(COLORS.values()))).legend(None),
).properties(
    title="Average points scored for songs in x language",
    width=1000
)

## Read Data

In [10]:
result_filepath = glob("*/*/every_eurovision_result_ever.csv")

In [13]:
results = pd.read_csv(result_filepath[0])

In [14]:
results.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1721 entries, 0 to 1720
Data columns (total 10 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   Song                1721 non-null   object
 1   Artist              1721 non-null   object
 2   Country             1721 non-null   object
 3   Year                1721 non-null   int64 
 4   Language            1721 non-null   object
 5   Grand Final Place   1667 non-null   object
 6   Grand Final Points  1666 non-null   object
 7   Semifinal           1674 non-null   object
 8   Semifinal Place     763 non-null    object
 9   Semifinal Points    763 non-null    object
dtypes: int64(1), object(9)
memory usage: 134.6+ KB


In [15]:
results["Country"].unique()

array(['Belgium', 'France', 'Germany', 'Italy', 'Luxembourg',
       'Netherlands', 'Switzerland', 'Austria', 'Denmark',
       'United Kingdom', 'Sweden', 'Monaco', 'Norway', 'Finland', 'Spain',
       'Yugoslavia', 'Portugal', 'Ireland', 'Malta', 'Israel', 'Greece',
       'Turkey', 'Morocco', 'Cyprus', 'Iceland', 'Bosnia and Herzegovina',
       'Croatia', 'Slovenia', 'Estonia', 'Hungary', 'Lithuania', 'Poland',
       'Romania', 'Russia', 'Slovakia', 'North Macedonia', 'Latvia',
       'Ukraine', 'Albania', 'Andorra', 'Belarus',
       'Serbia and Montenegro', 'Bulgaria', 'Moldova', 'Armenia',
       'Czech Republic', 'Georgia', 'Montenegro', 'Serbia', 'Azerbaijan',
       'San Marino', 'Australia'], dtype=object)

## Transform Data

In [16]:
results_with_scores = results[~results["Grand Final Place"].isna()] # 1956 was the first year of competition and the votes were lost. 2020 was cancelled due to COVID

In [17]:
# Convert the 'Language' column to lists
results_with_scores['Language'] = results_with_scores['Language'].apply(lambda x: x.split(', '))


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  results_with_scores['Language'] = results_with_scores['Language'].apply(lambda x: x.split(', '))


In [18]:
results_with_scores['Language'] = results_with_scores['Language'].replace(r'\[\w+\]', '', regex=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  results_with_scores['Language'] = results_with_scores['Language'].replace(r'\[\w+\]', '', regex=True)


In [19]:
results_with_scores["Grand Final Points"].fillna(0, inplace=True)
results_with_scores["Grand Final Points"].replace({"NQ": 0}, inplace=True)
results_with_scores["Grand Final Points"] = results_with_scores["Grand Final Points"].astype(int)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  results_with_scores["Grand Final Points"].fillna(0, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  results_with_scores["Grand Final Points"].fillna(0, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  results_with_scores["Grand Final Points"].replace({"NQ": 0}, inplace

In [20]:
results_with_scores_exploded = results_with_scores.explode(column="Language")

In [21]:
results_with_scores_exploded.groupby(by="Language").nunique().sort_values(by="Song", ascending=False).head(60)

Unnamed: 0_level_0,Song,Artist,Country,Year,Grand Final Place,Grand Final Points,Semifinal,Semifinal Place,Semifinal Points
Language,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
English,752,762,47,65,28,240,5,27,210
French,173,160,14,65,27,92,5,10,11
German,95,84,6,50,25,60,4,3,3
Italian,68,59,9,53,21,54,5,9,9
Spanish,66,65,5,61,27,48,5,4,4
Dutch,57,54,2,41,21,40,3,2,2
Portuguese,53,50,1,53,21,35,5,13,14
Greek,50,47,2,34,19,39,3,7,8
Hebrew,38,36,2,37,19,29,5,9,11
Swedish,36,33,2,35,19,28,2,1,1


In [22]:
filtered_exploded_results = results_with_scores_exploded.groupby('Language').filter(lambda x: len(x) > 6)

In [23]:
filtered_exploded_results.groupby(by="Language").nunique().head(60)

Unnamed: 0_level_0,Song,Artist,Country,Year,Grand Final Place,Grand Final Points,Semifinal,Semifinal Place,Semifinal Points
Language,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Albanian,10,10,1,10,6,7,3,8,9
Bosnian,12,12,1,12,11,12,4,6,6
Bulgarian,7,5,2,7,4,4,3,6,6
Croatian,22,22,2,21,13,15,5,8,11
Danish,29,27,1,29,17,22,2,1,1
Dutch,57,54,2,41,21,40,3,2,2
English,752,762,47,65,28,240,5,27,210
Estonian,7,7,1,7,6,7,3,3,3
Finnish,35,34,2,34,18,23,2,5,5
French,173,160,14,65,27,92,5,10,11


In [24]:
results_with_scores_exploded.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1819 entries, 12 to 1720
Data columns (total 10 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   Song                1819 non-null   object
 1   Artist              1819 non-null   object
 2   Country             1819 non-null   object
 3   Year                1819 non-null   int64 
 4   Language            1819 non-null   object
 5   Grand Final Place   1819 non-null   object
 6   Grand Final Points  1819 non-null   int64 
 7   Semifinal           1812 non-null   object
 8   Semifinal Place     876 non-null    object
 9   Semifinal Points    876 non-null    object
dtypes: int64(2), object(8)
memory usage: 156.3+ KB


## Visualise Data

In [25]:
count_chart = make_count_chart(filtered_exploded_results)
count_chart.show()


In [26]:
make_points_chart(filtered_exploded_results).show()

In [27]:
make_averages_chart(filtered_exploded_results).show()


In [28]:
charts = [create_year_chart(year, results_with_scores_exploded) for year in range(2000, 2024)]

In [29]:
for chart in charts:
    chart.show() if type(chart) != str else print(chart)

No data for 2020


## Save Charts

In [30]:
if not exists("visualisations/eurovision/count_chart.png"):
    count_chart.save("visualisations/eurovision/count_chart.png")