In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from collections import Counter

def load_excel(filepath):
    return pd.read_excel(filepath)

def clean_data(df):
    # Strip whitespace fra alle string kolonner
    for col in df.select_dtypes(['object']):
        df[col] = df[col].str.strip()
    
    # Fjern tomme rækker
    df = df.dropna(how='all')
    
    # Fjern dubletter
    df = df.drop_duplicates()
    
    return df


def visualize_data(df):
    # First set of visualizations
    fig1 = plt.figure(figsize=(20, 15))
    
    # Plot 1
    plt.subplot(2, 2, 1)
    top_10 = df.nlargest(10, 'Total')
    sns.barplot(data=top_10, x='Total', y='Country', palette='rocket')
    plt.title('Top 10 Most Fragile States 2023')
    
    # Plot 2
    plt.subplot(2, 2, 2)
    bottom_10 = df.nsmallest(10, 'Total')
    sns.barplot(data=bottom_10, x='Total', y='Country', palette='rocket_r')
    plt.title('Top 10 Most Stable States 2023')
    
    # Plot 3
    plt.subplot(2, 2, 3)
    sns.scatterplot(data=df, x='P3: Human Rights', y='P1: State Legitimacy', 
                    hue='Total', size='Total', sizes=(20, 200))
    plt.title('Human Rights vs State Legitimacy')
    
    # Plot 4
    plt.subplot(2, 2, 4)
    sns.histplot(data=df, x='Total', bins=30, kde=True)
    plt.title('Distribution of Fragile States Index Scores')
    
    plt.tight_layout()
    plt.show()

    # Second set of visualizations
    fig2 = plt.figure(figsize=(20, 15))
    
    # Plot 5
    plt.subplot(2, 2, 1)
    indicators = ['S1: Demographic Pressures', 'S2: Refugees and IDPs', 
                 'E1: Economy', 'P1: State Legitimacy', 'P3: Human Rights',
                 'C1: Security Apparatus', 'C2: Factionalized Elites']
    correlation = df[indicators].corr()
    sns.heatmap(correlation, annot=True, cmap='coolwarm', center=0)
    plt.title('Correlation Between Indicators')
    
    # Plot 6
    plt.subplot(2, 2, 2)
    data_melted = df.melt(value_vars=indicators)
    sns.boxplot(data=data_melted, x='variable', y='value')
    plt.xticks(rotation=45)
    plt.title('Distribution of Different Indicators')
    
    # Plot 7
    plt.subplot(2, 2, 3)
    top_5_each = pd.DataFrame()
    for ind in ['S1: Demographic Pressures', 'E1: Economy', 'P3: Human Rights']:
        top_5 = df.nlargest(5, ind)[['Country', ind]]
        top_5_each = pd.concat([top_5_each, top_5])
    
    sns.barplot(data=top_5_each, x='Country', y=top_5_each.iloc[:, 1], 
                hue='Country', dodge=False)
    plt.xticks(rotation=45)
    plt.title('Top 5 Countries for Each Main Indicator')
    
    # Plot 8
    plt.subplot(2, 2, 4)
    regions = ['Africa', 'Europe', 'Asia', 'Americas', 'Middle East']
    colors = ['red', 'blue', 'green', 'purple', 'orange']
    for i, region in enumerate(regions):
        region_size = len(df) // 5
        plt.scatter(df['Total'][:region_size], 
                   df['P3: Human Rights'][:region_size], 
                   label=region, alpha=0.6, c=colors[i])
    plt.legend()
    plt.title('Regional Distribution of Total Score vs Human Rights')
    
    plt.tight_layout()
    plt.show()
    
    print("\nStatistical Insights:")
    print(f"Global average FSI score: {df['Total'].mean():.2f}")
    print(f"Standard deviation: {df['Total'].std():.2f}")
    print("\nTop 3 most correlated indicators:")
    corr = correlation.unstack()
    top_corr = corr[corr != 1.0].sort_values(ascending=False)[:3]
    for idx, val in top_corr.items():
        print(f"{idx[0]} & {idx[1]}: {val:.2f}")
    
    print("""
    =====================================
         BOSS BATTLE BEGUN
    =====================================
                        /\\__/\\ 
      Hunter1  Hunter2    |ಠ ಠ|  
       \\[T]/   \\[+]/     \\▼▼/   
        |=|     |†|      /^^^^^\\
       / \\     / \\     <<|||||>>
                         
    
    =====================================
         BOSS BATTLE BEGUN
    =====================================
    """)