In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math as math
import seaborn as sns
import plotly as plotly
import plotly.express as px
import plotly.graph_objects as go

In [2]:
# GD Data
df_gdp = pd.read_csv("GDP/API_NY.GDP.MKTP.CD_DS2_en_csv_v2_9865.csv")


In [3]:
Bad_lst = ["Africa Eastern and Southern", "Africa Western and Central", "Arab World", "Central Europe and the Baltics",
           "Caribbean small states", "East Asia & Pacific (excluding high income)",
             "Early-demographic dividend", "East Asia & Pacific", "Europe & Central Asia (excluding high income)",
            "Europe & Central Asia", "European Union", "Fragile and conflict affected situations",
            "High income", "Heavily indebted poor countries (HIPC)", "IBRD only", 
            "IDA & IBRD total", "IDA total", "IDA blend", "IDA only", "Not classified",
              "Latin America & Caribbean (excluding high income)", "Latin America & Caribbean",
              "Least developed countries: UN classification", "Low income", "Lower middle income",
              "Low & middle income", "Late-demographic dividend", "Middle East & North Africa", 
              "Middle income", "Middle East & North Africa (excluding high income)",
              "North America", "OECD members", "Other small states", "Pre-demographic dividend",
              "Pacific island small states", "Post-demographic dividend", "Sub-Saharan Africa (excluding high income)",
              "Small states", "East Asia & Pacific (IDA & IBRD countries)", "Europe & Central Asia (IDA & IBRD countries)",
              "Latin America & the Caribbean (IDA & IBRD countries)", "Middle East & North Africa (IDA & IBRD countries)", 
              "South Asia (IDA & IBRD)", "Sub-Saharan Africa (IDA & IBRD countries)", "Upper middle income",
              "World", "South Asia"]

In [4]:
df_gdp = df_gdp.drop(df_gdp[df_gdp["Country Name"].isin(Bad_lst)].index)

In [6]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go


# Reshape the dataframe to have 'Year' and 'GDP (current US$)' columns
df_gdp_melted = df_gdp.melt(id_vars=["Country Name", "Country Code"], 
                            var_name="Year", 
                            value_name="GDP (current US$)", 
                            value_vars=df_gdp.columns[4:68])

# Convert the 'Year' column to numeric
df_gdp_melted["Year"] = pd.to_numeric(df_gdp_melted["Year"])

# Separate the United States data
df_us = df_gdp_melted[df_gdp_melted["Country Name"] == "United States"]
df_rest = df_gdp_melted[df_gdp_melted["Country Name"] != "United States"]

# Create a figure
fig = go.Figure()

# Add traces for each year
for year in df_gdp_melted["Year"].unique():
    df_year = df_gdp_melted[df_gdp_melted["Year"] == year]
    df_us_year = df_us[df_us["Year"] == year]
    df_rest_year = df_rest[df_rest["Year"] == year]
    
    max_value = df_year["GDP (current US$)"].max()
    
    # Add choropleth trace for the rest of the countries
    fig.add_trace(go.Choropleth(
        locations=df_rest_year["Country Code"],
        z=df_rest_year["GDP (current US$)"],
        text=df_rest_year["Country Name"],
        colorscale=px.colors.sequential.Plasma,
        zmin=0,
        zmax=max_value,
        colorbar=dict(title="GDP (current US$)"),
        showscale=False,
        name=f"Rest of the World ({year})"
    ))
    
    # Add scatter trace for the United States
    fig.add_trace(go.Scattergeo(
        locationmode='ISO-3',
        locations=df_us_year["Country Code"],
        text=df_us_year["Country Name"],
        marker=dict(
            size=10,
            color=df_us_year["GDP (current US$)"],
            colorscale=px.colors.sequential.Plasma,
            cmin=0,
            cmax=max_value,
            colorbar=dict(
                title="GDP (current US$)",
                tickvals=[0, max_value],
                ticktext=["Low", "High"]
            )
        ),
        name=f"United States ({year})",
        showlegend=True
    ))

# Update layout to display the legend horizontally
fig.update_layout(
    title="World GDP Over Time",
    geo=dict(
        showframe=False,
        showcoastlines=False,
        projection_type='equirectangular'
    ),
    legend=dict(
        orientation="h",
        yanchor="bottom",
        y=1.02,
        xanchor="right",
        x=1
    )
)

fig.show()

In [7]:
import plotly.graph_objects as go

# Function to create the dynamic world map with year-specific color scales
def create_dynamic_world_map(df, year_column, value_column, title):
    # Extract unique years in sorted order
    unique_years = sorted(df[year_column].unique())

    # Create the figure
    fig = go.Figure()

    # Loop over each year to create a separate trace with its own color scale
    for year in unique_years:
        year_frame = df[df[year_column] == year]
        min_gdp = year_frame[value_column].min()
        max_gdp = year_frame[value_column].max()

        # Create a choropleth trace for each year with year-specific zmin and zmax
        fig.add_trace(go.Choropleth(
            locations=year_frame["Country Code"],
            z=year_frame[value_column],
            locationmode='ISO-3',
            text=year_frame["Country Name"],
            colorscale="Plasma",
            zmin=min_gdp,
            zmax=max_gdp,
            colorbar_title="GDP (current US$)",
            visible=False  # Initially set all traces to invisible
        ))

    # Set the first trace (earliest year) as visible
    fig.data[0].visible = True

    # Create slider steps to update the visible trace
    steps = []
    for i, year in enumerate(unique_years):
        step = dict(
            method="update",
            args=[{"visible": [False] * len(unique_years)},  # Hide all traces
                  {"title": f"{title} - {year}"}],  # Update title to reflect year
        )
        step["args"][0]["visible"][i] = True  # Only make the current year trace visible
        steps.append(step)

    # Add the slider to the figure layout
    fig.update_layout(
        sliders=[dict(
            active=0,
            currentvalue={"prefix": "Year: "},
            pad={"t": 50},
            steps=steps
        )],
        width=1500,
        height=1000,
        title=title
    )

    fig.show()

# Generate the dynamic world map using the cleaned GDP dataframe
create_dynamic_world_map(df_gdp_melted_filtered, 'Year', 'GDP (current US$)', 'World GDP Over Time')


NameError: name 'df_gdp_melted_filtered' is not defined

In [8]:
#Migration Data
df_mig = pd.read_csv("Migration/API_SM.POP.NETM_DS2_en_csv_v2_10087.csv")

In [9]:
df_mig = df_mig.drop(df_mig[df_mig["Country Name"].isin(Bad_lst)].index)

In [10]:
# # Find the ten largest net migration values by country
# top_10_net_migration = df_mig.nlargest(10, 'Net Migration')
# print(top_10_net_migration)

In [11]:
df_mig_melt = df_mig.melt(
    id_vars=["Country Name", "Country Code"], 
    var_name="Year", 
    value_name="Net Migration", 
    value_vars=[str(year) for year in range(1960, 2024)]  # Adjust range to cover 1960-2023
)

# Convert 'Year' column to numeric
df_mig_melt["Year"] = pd.to_numeric(df_mig_melt["Year"])

# Create the animated choropleth map
def create_dynamic_world_map(df, year_column, value_column, title):
    fig = px.choropleth(
        df,
        locations="Country Code",
        color=value_column,
        hover_name="Country Name",
        animation_frame=year_column,
        title=title,
        color_continuous_scale=px.colors.sequential.Plasma,
        range_color=(df[value_column].min(), df[value_column].max())
    )
    fig.update_layout(width=1500, height=1000, coloraxis_colorbar=dict(orientation='h', y=-0.05))
    fig.show()

# Generate the map
create_dynamic_world_map(df_mig_melt, 'Year', 'Net Migration', 'World Net Migration Over Time')

Normalized Numbers

In [12]:
import numpy as np
import plotly.express as px

# Apply log transformation to the 'Net Migration' column to normalize values
df_mig_melt['Net Migration (log)'] = np.log1p(df_mig_melt['Net Migration'].abs()) * np.sign(df_mig_melt['Net Migration'])

# Function to create the dynamic world map with normalized (log-transformed) migration values
def create_normalized_world_map(df, year_column, value_column, title):
    fig = px.choropleth(
        df,
        locations="Country Code",
        color=value_column,
        hover_name="Country Name",
        animation_frame=year_column,
        title=title,
        color_continuous_scale=px.colors.sequential.Plasma,
        range_color=(df[value_column].min(), df[value_column].max())
    )
    fig.update_layout(width=1500, height=1000, coloraxis_colorbar=dict(orientation='h', y=-0.05))
    fig.show()

# Generate the normalized dynamic map using the cleaned and log-transformed data
create_normalized_world_map(df_mig_melt, 'Year', 'Net Migration (log)', 'World Net Migration Over Time (Log Scale)')


ChatGPT Trend Line

In [13]:
import plotly.express as px

# Function to plot migration trends over years for selected or all countries
def plot_migration_trends(df, country_filter=None):
    """
    Plots migration trends over the years.
    
    Parameters:
    - df: DataFrame containing 'Country Name', 'Year', and 'Net Migration' columns.
    - country_filter: List of country names to filter and plot. If None, plots for all countries.
    """
    # Filter data for selected countries if provided
    if country_filter:
        df = df[df["Country Name"].isin(country_filter)]
    
    # Create line plot
    fig = px.line(
        df,
        x="Year",
        y="Net Migration",
        color="Country Name",
        title="Migration Trends Over the Years",
        labels={"Net Migration": "Net Migration (people)", "Year": "Year"}
    )
    
    # Update layout for better readability
    fig.update_layout(width=1000, height=600)
    fig.show()

# Example usage:
# Plot migration trends for all countries
plot_migration_trends(df_mig_melt)

# Or, to plot trends for specific countries (e.g., "Afghanistan", "India", "United States")
# plot_migration_trends(df_mig_melted, country_filter=["Afghanistan", "India", "United States"])


In [14]:
#Migration Data
df_mig = pd.read_csv("Migration/API_SM.POP.NETM_DS2_en_csv_v2_10087.csv")

In [15]:
df_mig = df_mig.drop(df_mig[df_mig["Country Name"].isin(Bad_lst)].index)

In [16]:
df_pop = pd.read_csv("Population/Population_data.csv")

In [17]:
df_pop = df_pop.drop(df_pop[df_pop["Country Name"].isin(Bad_lst)].index)

In [18]:
# GD Data
df_gdp = pd.read_csv("GDP/API_NY.GDP.MKTP.CD_DS2_en_csv_v2_9865.csv")

In [19]:
df_gdp = df_gdp.drop(df_gdp[df_gdp["Country Name"].isin(Bad_lst)].index)

In [20]:
# Reshape the dataframe to have 'Year' and 'GDP (current US$)' columns
df_gdp_melted = df_gdp.melt(id_vars=["Country Name", "Country Code"], 
                            var_name="Year", 
                            value_name="GDP (current US$)", 
                            value_vars=df_gdp.columns[4:68])

# Convert the 'Year' column to numeric
df_gdp_melted["Year"] = pd.to_numeric(df_gdp_melted["Year"])

In [21]:
# # Reshape the dataframe to have 'Year' and 'Net Migration' columns
df_mig_melted = df_mig.melt(id_vars=["Country Name", "Country Code"], 
                            var_name="Year", 
                            value_name="Net Migration", 
                            value_vars=df_mig.columns[4:68])

# Convert the 'Year' column to numeric
df_mig_melted["Year"] = pd.to_numeric(df_mig_melted["Year"])


In [22]:
# Reshape the dataframe to have 'Year' and 'World Populations' columns
df_pop_melted = df_pop.melt(id_vars=["Country Name", "Country Code"], 
                            var_name="Year", 
                            value_name="Current Population", 
                            value_vars=df_gdp.columns[4:68])

# Convert the 'Year' column to numeric
df_pop_melted["Year"] = pd.to_numeric(df_pop_melted["Year"])


In [25]:
import pandas as pd
import plotly.express as px

# Assuming df_gdp_melted, df_mig_melted, and df_pop_melted are already defined and loaded

# Step 1: Merge the dataframes on Country Code and Year
df_merged = pd.merge(df_gdp_melted, df_mig_melted, on=["Country Code", "Year"], suffixes=('_gdp', '_mig'))
df_merged = pd.merge(df_merged, df_pop_melted, on=["Country Code", "Year"])

# Step 1: Get unique country names
country_names = df_merged['Country Name'].unique()

# Step 2: Loop through each unique pair of countries and plot the correlation matrix
for i, country1 in enumerate(country_names):
    for country2 in country_names[i+1:]:  # Start the inner loop from the next country to avoid duplicate pairs
        print(f"Comparing {country1} vs {country2}")
        plot_correlation_matrix(country1, country2)

# Step 2: Create a dictionary to store correlation matrices for each country
country_corr_matrices = {}

for country in df_merged['Country Name'].unique():
    country_data = df_merged[df_merged['Country Name'] == country]
    corr_matrix = country_data[['GDP (current US$)', 'Net Migration', 'Current Population']].corr()
    country_corr_matrices[country] = corr_matrix

# Function to update the plot based on selected countries
def plot_correlation_matrix(country1, country2):
    fig = px.imshow(
        [country_corr_matrices[country1].values, country_corr_matrices[country2].values],
        labels=dict(x="Indicators", y="Indicators", color="Correlation"),
        x=country_corr_matrices[country1].columns,
        y=country_corr_matrices[country1].index,
        facet_col=[0, 1],  # Split the two countries' matrices side by side
        color_continuous_scale="Viridis",
    )
    fig.update_layout(
        title_text=f"Correlation Matrix Comparison: {country1} vs {country2}",
        coloraxis_colorbar=dict(title="Correlation"),
    )
    fig.show()

# Choose two countries to visualize their comparison
# Update country1 and country2 based on user preference
country1 = "CountryA"
country2 = "CountryB"
plot_correlation_matrix(country1, country2)


Comparing Aruba vs Afghanistan


AttributeError: 'list' object has no attribute 'shape'

In [23]:
import pandas as pd
import plotly.graph_objects as go

# Assuming df_gdp_melted, df_mig_melted, and df_pop_melted are already defined and loaded

# Step 1: Merge the dataframes on Country Code and Year
df_merged = pd.merge(df_gdp_melted, df_mig_melted, on=["Country Code", "Year"], suffixes=('_gdp', '_mig'))
df_merged = pd.merge(df_merged, df_pop_melted, on=["Country Code", "Year"])

# Step 2: Create a dictionary to store correlation matrices for each country
country_corr_matrices = {}

for country in df_merged['Country Name'].unique():
    country_data = df_merged[df_merged['Country Name'] == country]
    corr_matrix = country_data[['GDP (current US$)', 'Net Migration', 'Current Population']].corr()
    country_corr_matrices[country] = corr_matrix

# Step 3: Initialize the figure with two placeholder heatmaps
fig = go.Figure()

# Add two initial heatmaps for displaying two selected countries' matrices side by side
fig.add_trace(
    go.Heatmap(
        z=country_corr_matrices[list(country_corr_matrices.keys())[0]].values,
        x=country_corr_matrices[list(country_corr_matrices.keys())[0]].columns,
        y=country_corr_matrices[list(country_corr_matrices.keys())[0]].columns,
        coloraxis="coloraxis",
        name="Country 1",
        showscale=False  # Scale only on the second heatmap for clarity
    )
)

fig.add_trace(
    go.Heatmap(
        z=country_corr_matrices[list(country_corr_matrices.keys())[1]].values,
        x=country_corr_matrices[list(country_corr_matrices.keys())[1]].columns,
        y=country_corr_matrices[list(country_corr_matrices.keys())[1]].columns,
        coloraxis="coloraxis",
        name="Country 2",
        showscale=True
    )
)

# Step 4: Create dropdown options to update heatmaps for selected countries
dropdown_buttons = []
for country1 in country_corr_matrices.keys():
    for country2 in country_corr_matrices.keys():
        if country1 != country2:
            dropdown_buttons.append(
                {
                    "method": "update",
                    "label": f"{country1} vs {country2}",
                    "args": [
                        {"z": [country_corr_matrices[country1].values, country_corr_matrices[country2].values]},
                        {"title": f"Correlation Comparison: {country1} vs {country2}"}
                    ]
                }
            )

# Step 5: Update layout with dropdown menu
fig.update_layout(
    updatemenus=[
        {
            "buttons": dropdown_buttons,
            "direction": "down",
            "showactive": True,
        }
    ],
    title="Correlation Matrix Comparison Between Two Countries",
    coloraxis={'colorscale': 'Viridis'},
)

# Show the figure
fig.show()



KeyboardInterrupt: 