In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt

# Function to merge CSV files in a directory
def merge_csv_files(folder_path):
    all_csv_data = pd.DataFrame()
    for filename in os.listdir(folder_path):
        if filename.endswith(".csv"):
            csv_data = pd.read_csv(os.path.join(folder_path, filename))
            all_csv_data = pd.concat([all_csv_data, csv_data], ignore_index=True)
    return all_csv_data

# Path to the main directory
main_directory = "All_outputs/"

# Creating empty dataframe
Final_DF_All_SSPs_All_Crossings_All_Countries = pd.DataFrame()

# Loop through each country folder
for country_folder in os.listdir(main_directory):
    country_folder_path = os.path.join(main_directory, country_folder)
    if os.path.isdir(country_folder_path):
        # Merge CSV files in the country folder
        country_data = merge_csv_files(country_folder_path)
        if not country_data.empty:
            # Add country abbreviation column
            country_data['Country'] = country_folder
            # Append country data to the main DataFrame
            Final_DF_All_SSPs_All_Crossings_All_Countries = pd.concat([Final_DF_All_SSPs_All_Crossings_All_Countries, country_data], ignore_index=True)

# Define the file path
file_path = "Final_DF_All_SSPs_All_Crossings_All_Countries.csv"
Final_DF_All_SSPs_All_Crossings_All_Countries.to_csv(file_path, index=False)

Final_DF_All_SSPs_All_Crossings_All_Countries.columns


: 

In [None]:
# Rename 'El_Salvador' to 'El Salvador' in the DataFrame
Final_DF_All_SSPs_All_Crossings_All_Countries['Country'] = Final_DF_All_SSPs_All_Crossings_All_Countries['Country'].replace({'El_Salvador': 'El Salvador'})

# Define the country codes
country_codes = {
    'Mexico': 'MEX',
    'El Salvador': 'ELSAL',
    'Honduras': 'HON',
    'Guatemala': 'GUA'
}

# Insert the 'Country_Code' column after 'Country'
column_index = Final_DF_All_SSPs_All_Crossings_All_Countries.columns.get_loc('Country') + 1
Final_DF_All_SSPs_All_Crossings_All_Countries.insert(column_index, 'Country_Code', Final_DF_All_SSPs_All_Crossings_All_Countries['Country'].map(country_codes))
# Final_DF_All_SSPs_All_Crossings_All_Countries


: 

# Sum of Immigrants Per Crossing

# Total Immigrants by Country for Scenario

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import pandas as pd

# Reshape the DataFrame using melt
melted_df = Final_DF_All_SSPs_All_Crossings_All_Countries.melt(
    id_vars=['Scenario', 'Crossing', 'Country', 'Country_Code'],
    var_name='Year',
    value_name='Total Immigrants'
)

# Convert 'Year' to datetime if necessary
melted_df['Year'] = pd.to_datetime(melted_df['Year'], format='%Y')

# Filter the data to include only every ten years or until 2100
filtered_df = melted_df[melted_df['Year'].dt.year % 10 == 0]  # Every ten years
filtered_df = filtered_df[filtered_df['Year'].dt.year <= 2100]  # Until 2100

# Create a custom color palette for countries using 'viridis' palette
custom_palette = 'viridis'

# Group by scenario and plot
scenarios = melted_df['Scenario'].unique()
num_scenarios = len(scenarios)

# Create subplots in a 2x2 grid
fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(16, 12))

# Flatten the axes array for easier iteration
axes = axes.flatten()

for i, scenario in enumerate(scenarios):
    scenario_df = filtered_df[filtered_df['Scenario'] == scenario]
    
    # Select the current subplot
    ax = axes[i]
    
    # Use Seaborn's barplot with custom palette
    sns.barplot(data=scenario_df, x=scenario_df['Year'].dt.year, y='Total Immigrants', hue='Country_Code', palette=custom_palette, ax=ax)
    
    # Customize the plot
    ax.set_title(f'Total Immigrants by Country for Scenario: {scenario}\n(95% Confidence Interval)')
    ax.set_xlabel('Year')
    ax.set_ylabel('Total Immigrants (100,000)')
    ax.grid(True)
    
    # Change background color
    ax.set_facecolor('#f0f0f0')
    
    # Annotate each bar with its corresponding value
    for p in ax.patches:
        ax.annotate(f'{int(p.get_height()):,}', (p.get_x() + p.get_width() / 2., p.get_height()),
                     ha='center', va='bottom', fontsize=10, fontweight='bold', rotation=90, xytext=(0, 5),
                     textcoords='offset points')

# Adjust layout
plt.tight_layout()

# Save the plots
# folder_path = "Script_Output_Graphs_Dataframes/Crawler_Output_Graphs"
plt.savefig(f"{folder_path}/Final_Graphs/immigrants_by_country_scenario.png")

# Show the plots
plt.show()


: 

# Total Immigrants by Crossing

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Group by scenario and plot
for scenario in melted_df['Scenario'].unique():
    scenario_df = filtered_df[filtered_df['Scenario'] == scenario]
    unique_countries = scenario_df['Country'].unique()

    # Create subplots for each scenario
    fig, axes = plt.subplots(2, 2, figsize=(12, 10))
    fig.suptitle(f'Total Immigrants by Crossing for {scenario}', fontsize=16)

    # Calculate min and max values for each country, scenario, and crossing
    min_max_values = scenario_df.groupby(['Country', 'Scenario', 'Crossing'])['Total Immigrants'].agg(['min', 'max']).reset_index()

    # Plot for each country
    for i, country in enumerate(unique_countries):
        if country != 'Crawler_Outputs':  # Exclude 'Crawler_Outputs' plot
            row = i // 2
            col = i % 2
            country_df = scenario_df[scenario_df['Country'] == country]

            # Check if the country has any data for the scenario
            if not country_df.empty:
                ax = sns.barplot(data=country_df, x=country_df['Year'].dt.year, y='Total Immigrants', hue='Crossing', ax=axes[row, col], palette='viridis', errorbar=('ci', 0))
                ax.set_title(country)
                ax.set_xlabel('Year')
                ax.set_ylabel('Total Immigrants')
                ax.grid(True)
                ax.legend(loc='upper right')

                # Set y-axis limits based on min and max values per country, scenario, and crossing
                country_min_max = min_max_values[(min_max_values['Country'] == country) & (min_max_values['Scenario'] == scenario)]
                if not country_min_max.empty:
                    min_value = country_min_max['min'].min() - 0.1 * abs(country_min_max['min'].min())  # Add space before min value
                    max_value = country_min_max['max'].max() + 0.1 * abs(country_min_max['max'].max())  # Add space after max value
                    ax.set_ylim(min_value, max_value)

                    # Calculate the step size based on the range of values
                    value_range = max_value - min_value
                    step_size = round(value_range / 5)  # Dividing into 5 steps

                    # Set y-axis tick labels using the step size
                    ax.set_yticks(range(int(min_value), int(max_value) + 1, step_size))

                # Annotate each bar with the total number of immigrants
                for p in ax.patches:
                    if p.get_height() != 0:  # Ensure annotation is not placed on zero bars
                        if p.get_height() > 0:
                            annotation_height = p.get_height()
                        else:
                            annotation_height = p.get_height() - 0.02 * abs(max_value)  # Adjust for negative bars
                        ax.annotate(f'{int(p.get_height()):,}', (p.get_x() + p.get_width() / 2., annotation_height),
                                    ha='center', va='bottom', fontsize=8, rotation=90, xytext=(0, 5),
                                    textcoords='offset points')

    # Adjust layout
    plt.tight_layout(pad=2)

    # Show the plot
    plt.show()

: 

: 