In [None]:
from random import choice
from os.path import exists, join
from os import makedirs

import pandas as pd
import altair as alt

In [None]:
def get_indicator_line_chart(filtered_data: pd.DataFrame, randomly_chosen_column: str, country: str) -> alt.Chart:
    return alt.Chart(filtered_data).mark_line().encode(
    x='Year:O',
    y=alt.Y(randomly_chosen_column.replace(":", r"\:"), title=f'{randomly_chosen_column}'),
    color=alt.Color('Country Name:N', scale=alt.Scale(scheme='purples')),
    tooltip=['Country Name', 'Year', randomly_chosen_column]
).properties(
    title=f'{randomly_chosen_column} by Year in {country}',
    width=1000
)

In [None]:
def get_eurovision_line_chart(filtered_data: pd.DataFrame, country: str) -> alt.Chart:
    return alt.Chart(filtered_data).mark_line().encode(
        x="Year:O",
        y=alt.Y("Grand Final Place", scale=alt.Scale(reverse=True)),
        color=alt.Color('Country Name:N', scale=alt.Scale(scheme='magma')),
        tooltip=['Country Name', 'Year', "Grand Final Place"],
    ).properties(
        title=f"{country} Eurovision Final Place",
        width=1000
    )

In [None]:
def get_layered_chart(chart_one: alt.Chart, chart_two: alt.Chart, randomly_chosen_column: str, country: str) -> alt.Chart:
    return alt.layer(
        chart_one,
        chart_two
    ).resolve_scale(
        y="independent",
        color="independent"
        ).properties(
        title=f'{randomly_chosen_column} and Eurovision Place by Year in {country}',
        width=1000
    )

In [None]:
def filter_by_country(merged_df: pd.DataFrame, country_name: str) -> pd.DataFrame:
    return merged_df[merged_df["Country Name"].isin([country_name])]

In [None]:
def make_layer_chart(full_dataset: pd.DataFrame, country: str, randomly_chosen_column):
    country_data = filter_by_country(full_dataset, country)
    indicator = get_indicator_line_chart(country_data, randomly_chosen_column, country)
    eurovision = get_eurovision_line_chart(country_data, country)
    return get_layered_chart(indicator, eurovision, randomly_chosen_column, country)

In [None]:
world_data = pd.read_csv("data/world_bank/combined_world_data.csv")
eurovision_data = pd.read_csv("data/eurovision/cleansed_eurovision_data.csv")

In [None]:
world_data.sample(10)

In [None]:
eurovision_data.sample(10)

In [None]:
eurovision_countries = sorted(eurovision_data["Country"].unique())
world_countries = sorted(world_data["Country Name"].unique())

In [None]:
for country in eurovision_countries:
    if country not in world_data["Country Name"].unique():
        print(country)

In [None]:
for country in world_countries:
    if country not in eurovision_countries:
        print(country)

In [None]:
eurovision_data["Country"].replace({"Czech Republic": "Czechia", "Russia": "Russian Federation", "Slovakia": "Slovak Republic", "Turkey": "Turkiye"}, inplace=True)

In [None]:
eurovision_data.rename(columns={"Country": "Country Name"}, inplace=True)

In [None]:
merged_df = pd.merge(eurovision_data, world_data, on=['Country Name', 'Year'], how='inner').reset_index()


In [None]:
first_cols = ['Country Name', 'Country Code', 'Year', "Song", "Artist", "Language", "Grand Final Place", "Grand Final Points", "Semifinal", "Semifinal Place", "Semifinal Points"]
remaining_cols = [col for col in merged_df.columns if col not in first_cols]
new_col_order = first_cols + remaining_cols

In [None]:
merged_df = merged_df[new_col_order]

In [None]:
merged_df.tail(10)

In [None]:
merged_df.info()

In [None]:
merged_df.to_csv("data/combined_data.csv")

## Visualisations

In [None]:
# To make it easier to see the data, choose a random indicator
randomly_chosen_column = choice([col for col in merged_df if col not in first_cols])
randomly_chosen_column

In [None]:
# Choose a random country
random_country = choice(merged_df["Country Name"].unique())
random_country


In [None]:
layered_chart = make_layer_chart(merged_df, random_country, randomly_chosen_column)
layered_chart.show()


In [None]:
active = input(f"Do you want to see the full results for {randomly_chosen_column}? Y/N").lower()
while active not in ["y", "n"]:
    active = input("Invalid Input. Do you want to save this? Y/N").lower()
if active == "y":
    for country in merged_df["Country Name"].unique():
        layer = make_layer_chart(merged_df, country, randomly_chosen_column)
        layer.show()


In [None]:
active = input("Do you want to save this? Y/N").lower()
while active not in ["y", "n"]:
    active = input("Invalid Input. Do you want to save this? Y/N").lower()
if active == "y":
    for country in merged_df["Country Name"].unique():
        filepath = join("visualisations", "combined", country, country + randomly_chosen_column.replace(" ", "_").lower() + ".png")
        if not exists(filepath):
            
            make_layer_chart(merged_df, country, "Refugee population by country or territory of origin").save(filepath)