In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import os

# Paths
data_folder = "C:/Users/danie/Nextcloud/Coding/Masterthesis/data/processed/results/EU28_impacts"
output_folder = "C:/Users/danie/Nextcloud/Coding/Masterthesis/data/visualizations"
price_shock_file = "C:/Users/danie/Nextcloud/Coding/Masterthesis/data/processed/price_data/II_PI_volatility.csv"

# Step 1: Process data
def process_csv_files(folder, price_shock_file):
    # Load the price shock data
    price_shock_path = os.path.join(folder, price_shock_file)
    price_shock_data = pd.read_csv(price_shock_path)

    # Rename columns to ensure consistency
    price_shock_data.rename(columns={'sector': 'Sector', 'price_volatility': 'Price Shock'}, inplace=True)
    
    all_data = []
    for file in os.listdir(folder):
        if file.endswith(".csv") and file != price_shock_file:
            file_path = os.path.join(folder, file)

            # Extract year from filenames like "eu28_impacts_2010.csv"
            try:
                year = int(file.split('_')[-1].split('.')[0])
            except ValueError:
                print(f"Skipping file: {file} (No valid year in the filename)")
                continue

            # Read CSV file and add a 'Year' column
            data = pd.read_csv(file_path)
            data.rename(columns=lambda x: x.strip(), inplace=True)  # Ensure no leading/trailing spaces in column names
            data['Year'] = year

            # Merge with price shock data on the sector
            merged_data = pd.merge(data, price_shock_data, on='Sector', how='left')

            all_data.append(merged_data)

    if not all_data:
        raise ValueError("No valid CSV files with years found in the specified folder.")
    
    return pd.concat(all_data, ignore_index=True)

# Step 2: Plot horizontal bar chart
def plot_top_sectors(data, year, output_path):
    data.loc[:, 'Total Impact'] = data['Direct Impact'] + data['Indirect Impact']
    top_15 = data.nlargest(15, 'Total Impact')

    # Sort top 15 by Total Impact in descending order for plotting
    top_15 = top_15.sort_values(by='Total Impact', ascending=False)

    # Plot
    fig, ax1 = plt.subplots(figsize=(12, 6))
    ax2 = ax1.twiny()  # Add a secondary axis for price shock scale

    # Bar chart for direct and indirect impacts
    ax1.barh(top_15['Sector'], top_15['Direct Impact'], label="Direct Impact", color='gold')
    ax1.barh(top_15['Sector'], top_15['Indirect Impact'], left=top_15['Direct Impact'], label="Indirect Impact", color='purple')
    
    # Scatter plot for price shock
    ax2.scatter(top_15['Price Shock'], top_15['Sector'], color='green', label="Price Shock", zorder=5)

    # Aesthetics
    ax1.set_title(f"Top 15 Sectors in {year} by Total Impact", fontsize=14)
    ax1.set_xlabel("CPI Inflation Impact (%)", fontsize=12)
    ax1.legend(loc="lower right")
    ax2.set_xlim(0, top_15['Price Shock'].max() * 1.1)  # Adjust scale for price shock
    ax2.set_xlabel("Price Shock Scale", fontsize=12)
    
    plt.tight_layout()

    # Save plot
    plt.savefig(os.path.join(output_path, f"top_sectors_{year}.png"))
    plt.savefig(output_file)
    plt.close()

    #Print confirmation
    print(f"PNG for year {year} saved at: {output_file}"

# Step 3: Generate ranking chart for top 15
def plot_ranking_chart(data, output_path):
    # Calculate rankings
    data['Total Impact'] = data['Direct Impact'] + data['Indirect Impact']
    rankings = []
    for year in data['Year'].unique():
        year_data = data[data['Year'] == year]
        year_data = year_data.nlargest(15, 'Total Impact')  # Top 15 only
        year_data['Rank'] = year_data['Total Impact'].rank(ascending=False, method='dense').astype(int)
        rankings.append(year_data[['Sector', 'Year', 'Rank']])
    rankings = pd.concat(rankings)

    # Pivot for plotting
    pivot_data = rankings.pivot(index='Sector', columns='Year', values='Rank')

    # Plot the rankings
    fig, ax = plt.subplots(figsize=(12, 8))
    for sector in pivot_data.index:
        ax.plot(
            pivot_data.columns, 
            pivot_data.loc[sector], 
            marker='o', 
            label=sector
        )

    # Aesthetics
    ax.set_title("Top 15 Sectors by Ranking Over Time", fontsize=16)
    ax.set_xlabel("Year", fontsize=12)
    ax.set_ylabel("Ranking", fontsize=12)
    ax.invert_yaxis()  # Rank 1 at the top
    ax.legend(loc='upper left', bbox_to_anchor=(1, 1), title="Sector")
    plt.tight_layout()

    # Save the ranking chart
    plt.savefig(os.path.join(output_path, "ranking_chart.png"))
    plt.close()

# Main Execution
if __name__ == "__main__":
    # Ensure output folder exists
    os.makedirs(output_folder, exist_ok=True)

    # Step 1: Process all files (include the price shock file)
    data = process_csv_files(data_folder, price_shock_file)

    # Step 2: Generate bar charts for each year
    for year in data['Year'].unique():
        year_data = data[data['Year'] == year]
        plot_top_sectors(year_data, year, output_folder)

    # Step 3: Generate ranking chart
    plot_ranking_chart(data, output_folder)


In [11]:
import pandas as pd
import matplotlib.pyplot as plt
import os

# Paths
data_folder = "C:/Users/danie/Nextcloud/Coding/Masterthesis/data/processed/results/EU28_impacts"
output_folder = "C:/Users/danie/Nextcloud/Coding/Masterthesis/data/visualizations"
price_shock_file = "C:/Users/danie/Nextcloud/Coding/Masterthesis/data/processed/price_data/II_PI_volatility.csv"

# Step 1: Process data
def process_csv_files(folder, price_shock_file):
    # Load the price shock data
    price_shock_path = os.path.join(folder, price_shock_file)
    price_shock_data = pd.read_csv(price_shock_path)

    # Rename columns to ensure consistency
    price_shock_data.rename(columns={'sector': 'Sector', 'price_volatility': 'Price Shock'}, inplace=True)
    
    all_data = []
    for file in os.listdir(folder):
        if file.endswith(".csv") and file != price_shock_file:
            file_path = os.path.join(folder, file)

            # Extract year from filenames like "eu28_impacts_2010.csv"
            try:
                year = int(file.split('_')[-1].split('.')[0])
            except ValueError:
                print(f"Skipping file: {file} (No valid year in the filename)")
                continue

            # Read CSV file and add a 'Year' column
            data = pd.read_csv(file_path)
            data.rename(columns=lambda x: x.strip(), inplace=True)  # Ensure no leading/trailing spaces in column names
            data['Year'] = year

            # Merge with price shock data on the sector
            merged_data = pd.merge(data, price_shock_data, on='Sector', how='left')

            all_data.append(merged_data)

    if not all_data:
        raise ValueError("No valid CSV files with years found in the specified folder.")
    
    return pd.concat(all_data, ignore_index=True)

# Step 2: Plot horizontal bar chart
def plot_top_sectors(data, year, output_path):
    data.loc[:, 'Total Impact'] = data['Direct Impact'] + data['Indirect Impact']
    top_15 = data.nlargest(15, 'Total Impact')

    # Sort top 15 by Total Impact in descending order for plotting
    top_15 = top_15.sort_values(by='Total Impact', ascending=False)

    # Plot
    fig, ax1 = plt.subplots(figsize=(12, 6))
    ax2 = ax1.twiny()  # Add a secondary axis for price shock scale

    # Bar chart for direct and indirect impacts
    ax1.barh(top_15['Sector'], top_15['Direct Impact'], label="Direct Impact", color='gold')
    ax1.barh(top_15['Sector'], top_15['Indirect Impact'], left=top_15['Direct Impact'], label="Indirect Impact", color='purple')
    
    # Scatter plot for price shock
    ax2.scatter(top_15['Price Shock'], top_15['Sector'], color='green', label="Price Shock", zorder=5)

    # Reverse the order of the y-axis to match descending order of Total Impact
    ax1.invert_yaxis()

    # Aesthetics
    ax1.set_title(f"Top 15 Sectors in {year} by Total Impact", fontsize=14)
    ax1.set_xlabel("CPI Inflation Impact (%)", fontsize=12)
    ax1.legend(loc="lower right")
    ax2.set_xlim(0, top_15['Price Shock'].max() * 1.1)  # Adjust scale for price shock
    ax2.set_xlabel("Price Shock Scale", fontsize=12)
    
    plt.tight_layout()

    # Save plot
    output_file = os.path.join(output_path, f"top_sectors_{year}.png")
    plt.savefig(output_file)
    plt.close()

    # Print confirmation
    print(f"PNG for year {year} saved at: {output_file}")

# Step 3: Generate ranking chart for top 15
def plot_ranking_chart(data, output_path):
    # Calculate rankings
    data.loc[:, 'Total Impact'] = data['Direct Impact'] + data['Indirect Impact']
    rankings = []
    for year in data['Year'].unique():
        year_data = data[data['Year'] == year]
        year_data = year_data.nlargest(15, 'Total Impact')  # Top 15 only
        year_data['Rank'] = year_data['Total Impact'].rank(ascending=False, method='dense').astype(int)
        rankings.append(year_data[['Sector', 'Year', 'Rank']])
    rankings = pd.concat(rankings)

    # Pivot for plotting
    pivot_data = rankings.pivot(index='Sector', columns='Year', values='Rank')

    # Plot the rankings
    fig, ax = plt.subplots(figsize=(12, 8))
    for sector in pivot_data.index:
        ax.plot(
            pivot_data.columns, 
            pivot_data.loc[sector], 
            marker='o', 
            label=sector
        )

    # Aesthetics
    ax.set_title("Top 15 Sectors by Ranking Over Time", fontsize=16)
    ax.set_xlabel("Year", fontsize=12)
    ax.set_ylabel("Ranking", fontsize=12)
    ax.invert_yaxis()  # Rank 1 at the top
    ax.legend(loc='upper left', bbox_to_anchor=(1, 1), title="Sector")
    plt.tight_layout()

    # Save the ranking chart
    output_file = os.path.join(output_path, "ranking_chart.png")
    plt.savefig(output_file)
    plt.close()

    # Print confirmation
    print(f"Ranking chart saved at: {output_file}")

# Main Execution
if __name__ == "__main__":
    # Ensure output folder exists
    os.makedirs(output_folder, exist_ok=True)

    # Step 1: Process all files (include the price shock file)
    data = process_csv_files(data_folder, price_shock_file)

    # Step 2: Generate bar charts for each year
    for year in data['Year'].unique():
        year_data = data[data['Year'] == year]
        plot_top_sectors(year_data, year, output_folder)

    # Step 3: Generate ranking chart
    plot_ranking_chart(data, output_folder)


PNG for year 2010 saved at: C:/Users/danie/Nextcloud/Coding/Masterthesis/data/visualizations\top_sectors_2010.png
PNG for year 2011 saved at: C:/Users/danie/Nextcloud/Coding/Masterthesis/data/visualizations\top_sectors_2011.png
PNG for year 2012 saved at: C:/Users/danie/Nextcloud/Coding/Masterthesis/data/visualizations\top_sectors_2012.png
PNG for year 2013 saved at: C:/Users/danie/Nextcloud/Coding/Masterthesis/data/visualizations\top_sectors_2013.png
PNG for year 2014 saved at: C:/Users/danie/Nextcloud/Coding/Masterthesis/data/visualizations\top_sectors_2014.png
PNG for year 2015 saved at: C:/Users/danie/Nextcloud/Coding/Masterthesis/data/visualizations\top_sectors_2015.png
PNG for year 2016 saved at: C:/Users/danie/Nextcloud/Coding/Masterthesis/data/visualizations\top_sectors_2016.png
PNG for year 2017 saved at: C:/Users/danie/Nextcloud/Coding/Masterthesis/data/visualizations\top_sectors_2017.png
PNG for year 2018 saved at: C:/Users/danie/Nextcloud/Coding/Masterthesis/data/visualizat