This is the **fourth script to run** in the workflow.  

# Build Media Salience Dataset from Media Cloud Articles

This script processes country-level CSVs of climate-related articles downloaded from **Media Cloud**.  
It creates yearly aggregates of **climate salience**, **climate article counts**, and **total article counts** for each EU member state.  


In [None]:
import pandas as pd
import os

# === 1. COUNTRY MAP ===
# Map lowercase country names (from file names) to ISO country codes.
countries = {
    "austria": "AUT", "belgium": "BEL", "bulgaria": "BGR", "croatia": "HRV", "cyprus": "CYP",
    "czechia": "CZE", "denmark": "DNK", "estonia": "EST", "finland": "FIN", "france": "FRA",
    "germany": "DEU", "greece": "GRC", "hungary": "HUN", "ireland": "IRL", "italy": "ITA",
    "latvia": "LVA", "lithuania": "LTU", "luxembourg": "LUX", "malta": "MLT", "netherlands": "NLD",
    "poland": "POL", "portugal": "PRT", "romania": "ROU", "slovakia": "SVK", "slovenia": "SVN",
    "spain": "ESP", "sweden": "SWE"
}

# === 2. PROCESS SINGLE FILE ===
def process_file(file_path, country_code, country_name, save_dir):
    # Load file and add "year" column
    df = pd.read_csv(file_path)
    df["year"] = pd.to_datetime(df["date"]).dt.year
    df = df[["year", "count", "total_count"]]

    # Aggregate data for all years except 2025
    df_not_2025 = df[df["year"] != 2025]
    group = df_not_2025.groupby("year").agg({
        "count": ["mean", "sum"],
        "total_count": "sum"
    }).reset_index()
    group.columns = ["year", "Media salience (MC)", "Climate articles (MC)", "Total articles (MC)"]

    # Handle 2025 separately: only first 6 months
    df_2025 = df[df["year"] == 2025].head(6)
    if not df_2025.empty:
        row_2025 = {
            "year": 2025,
            "Media salience (MC)": df_2025["count"].mean(),
            "Climate articles (MC)": df_2025["count"].sum(),
            "Total articles (MC)": df_2025["total_count"].sum()
        }
        group = pd.concat([group, pd.DataFrame([row_2025])], ignore_index=True)

    # Add country info
    group["Country Code"] = country_code
    group["Country"] = country_name

    # Reorder columns
    final = group[[
        "Country Code", "Country", "year",
        "Media salience (MC)", "Climate articles (MC)", "Total articles (MC)"
    ]]

    # Save processed file
    file_name = f"MediaSalience_{country_name}.csv"
    full_path = os.path.join(save_dir, file_name)
    final.to_csv(full_path, index=False)
    print(f"Seaved: {file_name}")

    return final


# === 3. MAIN SCRIPT ===
if __name__ == "__main__":
    # Define paths
    base_path = 'insert/your/path/Media Cloud articles'
    save_path = 'insert/your/path/Salience MediaCloud'

    # List all CSV files in folder
    all_files = os.listdir(base_path)
    csv_files = [f for f in all_files if f.endswith(".csv")]

    # Process each file
    for file in csv_files:
        try:
            # Extract country from filename (assumes last part = country)
            country_part = file.split("_")[-1].replace(".csv", "").strip().lower()
            if country_part in countries:
                country_name = country_part.capitalize()
                country_code = countries[country_part]
                file_path = os.path.join(base_path, file)
                process_file(file_path, country_code, country_name, save_path)
            else:
                print(f"Country not found in map for file: {file}")
        except Exception as e:
            print(f"Error processing file {file}: {e}")     

✅ Salvato: MediaSalience_Finland.csv
✅ Salvato: MediaSalience_Greece.csv
✅ Salvato: MediaSalience_Belgium.csv
✅ Salvato: MediaSalience_Czechia.csv
✅ Salvato: MediaSalience_Slovenia.csv
✅ Salvato: MediaSalience_Netherlands.csv
✅ Salvato: MediaSalience_Lithuania.csv
✅ Salvato: MediaSalience_Croatia.csv
✅ Salvato: MediaSalience_Portugal.csv
✅ Salvato: MediaSalience_Italy.csv
✅ Salvato: MediaSalience_Sweden.csv
✅ Salvato: MediaSalience_Austria.csv
✅ Salvato: MediaSalience_Denmark.csv
✅ Salvato: MediaSalience_Spain.csv
✅ Salvato: MediaSalience_Bulgaria.csv
✅ Salvato: MediaSalience_France.csv
✅ Salvato: MediaSalience_Luxembourg.csv
✅ Salvato: MediaSalience_Ireland.csv
✅ Salvato: MediaSalience_Latvia.csv
✅ Salvato: MediaSalience_Malta.csv
✅ Salvato: MediaSalience_Hungary.csv
✅ Salvato: MediaSalience_Estonia.csv
✅ Salvato: MediaSalience_Slovakia.csv
✅ Salvato: MediaSalience_Romania.csv
✅ Salvato: MediaSalience_Poland.csv
✅ Salvato: MediaSalience_Cyprus.csv
✅ Salvato: MediaSalience_Germany.csv


This is the **fifth script to run** in the workflow.  

# Build Relative Climate Salience (RCM) Dataset from Google Trends

This script processes **Google Trends data** on searches for *climate* and *economy* to build an indicator of **Relative Climate Salience (RCM)** for each EU member state.  
RCM is defined as the ratio of climate-related searches to economy-related searches.


In [None]:
import pandas as pd
import os

# === 1. PROCESS SINGLE FILE ===
def get_rcm_from_csv(percorso_csv, country_code, country_name, save_dir):
    # Load CSV, skip first 3 rows (Google Trends headers)
    df = pd.read_csv(percorso_csv, skiprows=3)
    df.columns = ["month", "climate", "economy"]

    # Extract year from month column
    df["year"] = df["month"].str.slice(0, 4).astype(int)

    # Convert to numeric (force errors to NaN)
    df["climate"] = pd.to_numeric(df["climate"], errors="coerce")
    df["economy"] = pd.to_numeric(df["economy"], errors="coerce")

    # Annual averages
    annual = df.groupby("year")[["climate", "economy"]].mean().reset_index()

    # Relative Climate salience = climate / economy
    annual["RCM"] = annual["climate"] / annual["economy"]

    # Add metadata
    annual["Country Code"] = country_code 
    annual["Country"] = country_name

    # Rename columns
    annual = annual.rename(columns={
        "climate": "Climate salience (GT)",
        "economy": "Economy salience (GT)",
        "RCM": "Relative Climate salience (RCM)"
    })

    # Reorder columns
    result = annual[[
        "Country Code",
        "Country",
        "year",
        "Climate salience (GT)",
        "Economy salience (GT)",
        "Relative Climate salience (RCM)"
    ]]

    # Save processed file
    file_name = f"RCM_{country_name}.csv"
    full_path = os.path.join(save_dir, file_name)
    result.to_csv(full_path, index=False)
    print(f"Saved: {full_path}")

    return result


# === 2. MAIN SCRIPT ===
if __name__ == "__main__":
    # Map EU-27 country names → ISO codes
    countries = {
        "Austria": "AUT", "Belgium": "BEL", "Bulgaria": "BGR", "Croatia": "HRV",
        "Cyprus": "CYP", "Czechia": "CZE", "Denmark": "DNK", "Estonia": "EST",
        "Finland": "FIN", "France": "FRA", "Germany": "DEU", "Greece": "GRC",
        "Hungary": "HUN", "Ireland": "IRL", "Italy": "ITA", "Latvia": "LVA",
        "Lithuania": "LTU", "Luxembourg": "LUX", "Malta": "MLT", "Netherlands": "NLD",
        "Poland": "POL", "Portugal": "PRT", "Romania": "ROU", "Slovakia": "SVK",
        "Slovenia": "SVN", "Spain": "ESP", "Sweden": "SWE"
    }

    # Define paths
    base_path = "insert/your/path/G-trends RCM"
    save_path = "insert/your/path/RCM by country"

    # Loop over all countries
    for country, code in countries.items():
        file_name = f"multiTimeline_{country.lower()}.csv"
        file_path = os.path.join(base_path, file_name)

        if os.path.exists(file_path):
            try:
                get_rcm_from_csv(file_path, code, country, save_path)
            except Exception as e:
                print(f"Error for {country}: {e}")
        else:
            print(f"Missing file: {file_name}")