# Eurovision

## Webscrape Data from Wikipedia

In [None]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
URL = "https://de.wikipedia.org/wiki/Eurovision_Song_Contest#L%C3%A4nderstatistik"

page = requests.get(URL).text
soup = BeautifulSoup(page, 'html.parser')

df = []
for tr in soup.find_all('tr'):
    tds = tr.find_all('td')
    if tds:
        first_td_value = tds[0].get_text(strip=True)
        for td in tds:
            colspan = td.get('colspan', '')  
            bg_color = td.get('style', '').split('background:')[1].split(';')[0] if 'background:' in td.get('style', '') else None
            df.append({
                'First TD Value': first_td_value,
                'Colspan': colspan,
                'Background Color': bg_color
            })

df = pd.DataFrame(df)

color_meanings = {
    '#808080': 'Final',
    '#FFFF40': 'Won',
    '#FFCBCB': 'Abgesagt',
    '#B3B7FF': 'Semifinale',
    '#C0C0C0': 'Covid',
    '#B9FFC5': 'Delete',
    '#EAECF0': 'Did not participate'
}

df = df.dropna(subset=['Colspan', 'Background Color'])  
df = df[(df['Colspan'].astype(str).str.strip() != '') & 
        (df['Background Color'].astype(str).str.strip() != '') & 
        (~df['First TD Value'].isin(['Teil-nehmer', '50er']))]
df['Meaning'] = df['Background Color'].map(color_meanings)
df = df.drop(index=544, errors='ignore')
print(df)

     First TD Value Colspan Background Color     Meaning
458       BelgienBE       4          #808080       Final
459       BelgienBE      10          #808080       Final
460       BelgienBE      10          #808080       Final
461       BelgienBE       6          #808080       Final
462       BelgienBE       1          #FFFF40         Won
...             ...     ...              ...         ...
1942   AustralienAU       5          #808080       Final
1943   AustralienAU       1          #C0C0C0       Covid
1944   AustralienAU       1          #B3B7FF  Semifinale
1945   AustralienAU       2          #808080       Final
1946   AustralienAU       1          #B3B7FF  Semifinale

[1030 rows x 4 columns]


In [None]:
start_year = 1956

# Create an empty list to store expanded data
expanded_data = []

# Group the DataFrame by 'First TD Value' (Country)
for country, group in df.groupby('First TD Value'):
    current_year = start_year  # Reset the year for each new country
    
    # Iterate over each row for the current country
    for index, row in group.iterrows():
        try:
            # Convert colspan to an integer
            years_to_expand = int(row['Colspan'])
        except ValueError:
            # Skip rows where Colspan is not a valid integer
            continue

        # Expand the row for each year in the colspan
        for _ in range(years_to_expand):
            expanded_data.append({
                'Country': row['First TD Value'],
                'Year': current_year,
                'Background Color': row['Background Color'],
                'Meaning': row['Meaning']
            })
            current_year += 1  # Increment the year for the next expansion

# Create the new expanded DataFrame
expanded_df = pd.DataFrame(expanded_data)

# Display the expanded dataset
print(expanded_df)