In [10]:
import pandas as pd
import matplotlib.pyplot as plt
import os

# Paths
data_folder = "C:/Users/danie/Nextcloud/Coding/Masterthesis/data/processed/results/EU28_impacts"
output_folder = "C:/Users/danie/Nextcloud/Coding/Masterthesis/data/visualizations"
price_shock_file = "C:/Users/danie/Nextcloud/Coding/Masterthesis/data/processed/price_data/II_PI_volatility.csv"

# Step 1: Process data
def process_csv_files(folder, price_shock_file):
    # Load the price shock data
    price_shock_path = os.path.join(folder, price_shock_file)
    price_shock_data = pd.read_csv(price_shock_path)

    # Rename columns to ensure consistency
    price_shock_data.rename(columns={'sector': 'Sector', 'price_volatility': 'Price Shock'}, inplace=True)
    
    all_data = []
    for file in os.listdir(folder):
        if file.endswith(".csv") and file != price_shock_file:
            file_path = os.path.join(folder, file)

            # Extract year from filenames like "eu28_impacts_2010.csv"
            try:
                year = int(file.split('_')[-1].split('.')[0])
            except ValueError:
                print(f"Skipping file: {file} (No valid year in the filename)")
                continue

            # Read CSV file and add a 'Year' column
            data = pd.read_csv(file_path)
            data.rename(columns=lambda x: x.strip(), inplace=True)  # Ensure no leading/trailing spaces in column names
            data['Year'] = year

            # Merge with price shock data on the sector (and country if applicable)
            merged_data = pd.merge(data, price_shock_data, on='Sector', how='left')

            all_data.append(merged_data)

    if not all_data:
        raise ValueError("No valid CSV files with years found in the specified folder.")
    
    return pd.concat(all_data, ignore_index=True)

# Step 2: Calculate rankings
def calculate_rankings(data):
    data.loc[:, 'Total Impact'] = data['Direct Impact'] + data['Indirect Impact']
    rankings = []
    for year in data['Year'].unique():
        year_data = data[data['Year'] == year]
        year_data = year_data[['Sector', 'Total Impact', 'Year']].copy()
        year_data['Rank'] = year_data['Total Impact'].rank(ascending=False, method='dense').astype(int)
        rankings.append(year_data)
    return pd.concat(rankings)

# Step 3: Generate a static ranking chart
def plot_ranking_chart(rankings, output_path):
    # Pivot data for plotting
    pivot_data = rankings.pivot(index='Sector', columns='Year', values='Rank')
    pivot_data = pivot_data.dropna(how='all')  # Drop sectors with no data

    # Sort sectors by first available ranking
    sorted_sectors = pivot_data[pivot_data.columns[0]].sort_values().index
    pivot_data = pivot_data.loc[sorted_sectors]

    # Plot the rankings
    fig, ax = plt.subplots(figsize=(12, 8))
    for sector in pivot_data.index:
        ax.plot(
            pivot_data.columns, 
            pivot_data.loc[sector], 
            marker='o', 
            label=sector
        )

    # Aesthetics
    ax.set_title("Ranking of Sectors by Total Impact Over Time", fontsize=16)
    ax.set_xlabel("Year", fontsize=12)
    ax.set_ylabel("Ranking", fontsize=12)
    ax.invert_yaxis()  # Rank 1 should be at the top
    ax.legend(loc='upper left', bbox_to_anchor=(1, 1), title="Sector")
    plt.tight_layout()

    # Save the chart
    output_file = os.path.join(output_path, "ranking_chart.png")
    plt.savefig(output_file)
    plt.close()
    print(f"Ranking chart saved at {output_file}")

# Main Execution
if __name__ == "__main__":
    # Ensure output folder exists
    os.makedirs(output_folder, exist_ok=True)

    # Step 1: Process all files (include the price shock file)
    data = process_csv_files(data_folder, price_shock_file)

    # Step 2: Calculate rankings
    rankings = calculate_rankings(data)

    # Step 3: Plot ranking chart
    plot_ranking_chart(rankings, output_folder)

    # Save rankings as a CSV for reference
    rankings_output_file = os.path.join(output_folder, "sector_rankings.csv")
    rankings.to_csv(rankings_output_file, index=False)
    print(f"Rankings saved at {rankings_output_file}")


  plt.tight_layout()


Ranking chart saved at C:/Users/danie/Nextcloud/Coding/Masterthesis/data/visualizations\ranking_chart.png
Rankings saved at C:/Users/danie/Nextcloud/Coding/Masterthesis/data/visualizations\sector_rankings.csv
