In [None]:
%pip install dash
%pip install pyngrok
%pip install plotly
%pip install dash-bootstrap-components

Collecting dash
  Downloading dash-3.0.2-py3-none-any.whl.metadata (10 kB)
Collecting Flask<3.1,>=1.0.4 (from dash)
  Downloading flask-3.0.3-py3-none-any.whl.metadata (3.2 kB)
Collecting Werkzeug<3.1 (from dash)
  Downloading werkzeug-3.0.6-py3-none-any.whl.metadata (3.7 kB)
Collecting retrying (from dash)
  Downloading retrying-1.3.4-py3-none-any.whl.metadata (6.9 kB)
Downloading dash-3.0.2-py3-none-any.whl (7.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.9/7.9 MB[0m [31m20.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading flask-3.0.3-py3-none-any.whl (101 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m101.7/101.7 kB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading werkzeug-3.0.6-py3-none-any.whl (227 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m228.0/228.0 kB[0m [31m13.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading retrying-1.3.4-py3-none-any.whl (11 kB)
Installing collected packages: Werkzeug, retryi

In [None]:
import dash
import dash.dcc as dcc
import dash.html as html
import plotly.graph_objs as go
from dash.dependencies import Input, Output
import pandas as pd

# Load the data from CSV files
df = pd.read_csv("https://raw.githubusercontent.com/AkashGupta-26/IDS-2025-WritingAssignment/main/data/WID_Data_12042025-230756.csv", sep=';', engine='python', skiprows=1, header=None)

# Define the new header row
new_header = ["country", "variable", "percentile", "year", "value"]

# Add the new header row as the first row in the DataFrame
df.loc[-1] = new_header            # Insert at index -1
df.index = df.index + 1             # Shift index to make room at the top
df = df.sort_index()                # Sort index to reorder properly

# Set the first row as the header
df.columns = df.iloc[0]             # Set header
df = df[1:].reset_index(drop=True)  # Drop the header row and reset index
df['variable'] = 'Top 1%'

df2 = pd.read_csv("https://raw.githubusercontent.com/AkashGupta-26/IDS-2025-WritingAssignment/main/data/WID_Data_12042025-233335.csv", sep=';', engine='python', skiprows=1, header=None)


new_header = ["country", "variable", "percentile", "year", "value"]

df2.loc[-1] = new_header
df2.index = df2.index + 1
df2 = df2.sort_index()

df2.columns = df2.iloc[0]
df2 = df2[1:].reset_index(drop=True)
df2['variable'] = 'Top 10%'

df3 = pd.read_csv("https://raw.githubusercontent.com/AkashGupta-26/IDS-2025-WritingAssignment/main/data/WID_Data_12042025-233501.csv", sep=';', engine='python', skiprows=1, header=None)


new_header = ["country", "variable", "percentile", "year", "value"]

df3.loc[-1] = new_header
df3.index = df3.index + 1
df3 = df3.sort_index()

df3.columns = df3.iloc[0]
df3 = df3[1:].reset_index(drop=True)
df3['variable'] = 'Below 50%'

combined_df = pd.concat([df, df2, df3])
unique_countries = combined_df['country'].unique()

app = dash.Dash(__name__)

app.layout = html.Div([
    html.H1("Country Data Plot for Various Percentiles"),

    dcc.Dropdown(
        id='country-dropdown',
        options=[{'label': country, 'value': country} for country in unique_countries],
        multi=True,
        value=["Afghanistan"],
        placeholder="Select countries",
    ),


    dcc.Dropdown(
        id='percentile-dropdown',
        options=[
            {'label': 'Top 1%', 'value': 'Top 1%'},
            {'label': 'Top 10%', 'value': 'Top 10%'},
            {'label': 'Below 50%', 'value': 'Below 50%'}
        ],
        value='Top 1%',
        placeholder="Select percentile",
    ),

    dcc.Graph(id='line-plot'),
])

@app.callback(
    Output('line-plot', 'figure'),
    [Input('country-dropdown', 'value'),
     Input('percentile-dropdown', 'value')]
)
def update_graph(selected_countries, selected_percentile):
    if selected_percentile == 'Top 1%':
        filtered_df = df[df['variable'] == 'Top 1%']
    elif selected_percentile == 'Top 10%':
        filtered_df = df2[df2['variable'] == 'Top 10%']
    elif selected_percentile == 'Below 50%':
        filtered_df = df3[df3['variable'] == 'Below 50%']

    filtered_df = filtered_df[filtered_df['country'].isin(selected_countries)]

    fig = go.Figure()

    for country in selected_countries:
        country_data = filtered_df[filtered_df['country'] == country]
        fig.add_trace(go.Scatter(
            x=country_data['year'],
            y=country_data['value'],
            mode='lines+markers',
            name=country
        ))

    fig.update_layout(
        title=f"Income Share for {selected_percentile} Percentile",
        xaxis_title="Year",
        yaxis_title="Income Share",
        hovermode="closest"
    )

    return fig

# Run the server
if __name__ == '__main__':
    app.run(debug=True)


<IPython.core.display.Javascript object>

In [None]:
import pandas as pd
import plotly.express as px
import numpy as np

def plot_income_share(df, category, selected_df=None):
    if selected_df is None:
        selected_df = df

    year_counts = selected_df['year'].value_counts().sort_index()
    threshold = 100
    valid_years = year_counts[year_counts > threshold]

    if valid_years.empty:
        print(f"Warning: No valid years found for category: {category}. Skipping plot for this category.")
        return

    start_year = valid_years.index[0]
    df_clean = selected_df[selected_df['year'] >= start_year].copy()
    df_clean['year'] = df_clean['year'].astype(int)
    df_clean = df_clean.sort_values(by='year')

    bin_edges = np.arange(0, df_clean['value'].max() + 0.02, 0.02)
    bin_labels = [f"{int(left*100)}–{int(right*100)}%" for left, right in zip(bin_edges[:-1], bin_edges[1:])]
    df_clean['bin'] = pd.cut(df_clean['value'], bins=bin_edges, labels=bin_labels, include_lowest=True)
    df_clean['year'] = df_clean['year'].astype(str)

    color_list = px.colors.qualitative.Safe * 3
    unique_bins = df_clean['bin'].unique().tolist()
    color_map = {label: color_list[i % len(color_list)] for i, label in enumerate(unique_bins)}

    fig = px.choropleth(
        df_clean,
        locations="country",
        locationmode="country names",
        color="bin",
        hover_name="country",
        animation_frame="year",
        color_discrete_map=color_map,
        title=f"{category} Income Share by Country (Distinct 2% Steps)"
    )

    fig.update_layout(
        geo=dict(
            showframe=False,
            showcoastlines=False,
            projection_type="natural earth",
            bgcolor='black'
        ),
        paper_bgcolor='black',
        plot_bgcolor='black',
        font_color='white',
        title_font_size=20,
        legend_title_text="Income Share",
        margin={"r": 0, "t": 50, "l": 0, "b": 0},
        height=600
    )

    fig.show()

def dynamic_category_plot():
    categories = ['Top 1%', 'Top 10%', 'Bottom 50%']
    print("Available categories:")
    for i, category in enumerate(categories, 1):
        print(f"{i}. {category}")

    try:
        choice = int(input(f"Enter the number (1-3) for the category you want to plot: "))
        if choice < 1 or choice > 3:
            raise ValueError("Invalid choice")
    except ValueError as e:
        print(f"Error: {e}")
        return

    selected_category = categories[choice - 1]

    if selected_category == 'Top 1%':
        plot_income_share(df, selected_category)
    elif selected_category == 'Top 10%':
        plot_income_share(df, selected_category, selected_df=df2)
    elif selected_category == 'Bottom 50%':
        plot_income_share(df, selected_category, selected_df=df3)

dynamic_category_plot()


Available categories:
1. Top 1%
2. Top 10%
3. Bottom 50%
Enter the number (1-3) for the category you want to plot: 1


In [None]:
import pandas as pd
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import plotly.express as px

f = pd.read_csv("https://raw.githubusercontent.com/AkashGupta-26/IDS-2025-WritingAssignment/main/data/female_data.csv", sep=';', engine='python', skiprows=1, header=None)

new_header = ["country", "variable", "percentile", "year", "value"]

f.loc[-1] = new_header
f.index = f.index + 1
f = f.sort_index()
f.columns = f.iloc[0]
f = f[1:].reset_index(drop=True)

f["year"] = pd.to_numeric(f["year"], errors='coerce')
f["value"] = pd.to_numeric(f["value"], errors='coerce')

app = dash.Dash(__name__)

app.layout = html.Div([
    html.H1("Female Income Share Over Time"),
    dcc.Dropdown(
        id='country-dropdown',
        options=[{'label': c, 'value': c} for c in sorted(f['country'].unique())],
        value=['Afghanistan'],
        multi=True,
        placeholder="Select countries"
    ),
    dcc.Graph(id='line-plot')
])

# Callback to update graph
@app.callback(
    Output('line-plot', 'figure'),
    [Input('country-dropdown', 'value')]
)
def update_graph(selected_countries):
    if not selected_countries:
        return px.line(title="No countries selected.")

    filtered_df = f[f['country'].isin(selected_countries)]
    fig = px.line(
        filtered_df,
        x='year',
        y='value',
        color='country',
        title='Income Share Over Years',
        markers=True
    )
    fig.update_layout(template='plotly_dark')
    return fig

# Run app
if __name__ == '__main__':
    app.run(debug=True)


<IPython.core.display.Javascript object>

In [None]:
import pandas as pd
import plotly.express as px

gdp = pd.read_csv("https://raw.githubusercontent.com/AkashGupta-26/IDS-2025-WritingAssignment/main/data/GDP_data.csv", sep=';', engine='python', skiprows=1, header=None)

new_header = ["country", "variable", "percentile", "year", "value"]
gdp.loc[-1] = new_header
gdp.index = gdp.index + 1
gdp = gdp.sort_index()
gdp.columns = gdp.iloc[0]
gdp = gdp[1:].reset_index(drop=True)

gdp['year'] = pd.to_numeric(gdp['year'], errors='coerce')
gdp['value'] = pd.to_numeric(gdp['value'], errors='coerce')
gdp = gdp.dropna(subset=['year', 'value'])

gdp['rank'] = gdp.groupby('year')['value'].rank(method='first', ascending=False)
gdp['rank'] = gdp['rank'].astype(int)

gdp = gdp.sort_values(by=['year', 'rank'])

fig = px.scatter(
    gdp,
    x="rank",
    y="value",
    size="value",
    animation_frame="year",
    hover_name="country",
    color="country",
    size_max=60,
    title=" per adult GDP of Country Over Time",
    text="country",
)

fig.update_layout(
    xaxis=dict(title="Rank (1 = Highest GDP)", autorange="reversed"),
    yaxis_title="GDP Value",
    template="plotly_dark",
    height=600,
    showlegend=False,
    margin=dict(t=50, b=40, l=10, r=10),
)

fig.update_traces(textposition="top center")

fig.show()


In [None]:
import pandas as pd
import plotly.express as px

wealth = pd.read_csv("https://raw.githubusercontent.com/AkashGupta-26/IDS-2025-WritingAssignment/main/data/wealth_income.csv", sep=';', engine='python', skiprows=1, header=None)

new_header = ["country", "variable", "percentile", "year", "value"]
wealth.loc[-1] = new_header
wealth.index = wealth.index + 1
wealth = wealth.sort_index()
wealth.columns = wealth.iloc[0]
wealth = wealth[1:].reset_index(drop=True)

wealth['year'] = pd.to_numeric(wealth['year'], errors='coerce')
wealth['value'] = pd.to_numeric(wealth['value'], errors='coerce')
wealth = wealth.dropna(subset=['year', 'value'])

wealth['rank'] = wealth.groupby('year')['value'].rank(method='first', ascending=False).astype(int)

wealth = wealth.sort_values(by=['year', 'rank'])

y_max = wealth['value'].max() * 1.2

fig = px.scatter(
    wealth,
    x="rank",
    y="value",
    size="value",
    animation_frame="year",
    hover_name="country",
    color="country",
    size_max=60,
    title="Wealth/Income ratio Share by Country Over Time",
    text="country"
)

fig.update_layout(
    xaxis=dict(
        title="Rank (1 = Highest Value)",
        autorange="reversed"
    ),
    yaxis=dict(
        title="Wealth/Income Value",
        range=[0, y_max],
    ),
    template="plotly_dark",
    height=600,
    showlegend=False,
    margin=dict(t=50, b=40, l=10, r=10),
)

fig.update_traces(textposition="top center")

fig.show()


In [None]:
import pandas as pd
import plotly.express as px

co2 = pd.read_csv("https://raw.githubusercontent.com/AkashGupta-26/IDS-2025-WritingAssignment/main/data/co2_emmision.csv", sep=';', engine='python', skiprows=1, header=None)

new_header = ["country", "variable", "percentile", "year", "value"]
co2.loc[-1] = new_header
co2.index = co2.index + 1
co2 = co2.sort_index()

co2.columns = co2.iloc[0]
co2 = co2[1:].reset_index(drop=True)

co2['year'] = pd.to_numeric(co2['year'], errors='coerce')
co2['value'] = pd.to_numeric(co2['value'], errors='coerce')
co2 = co2.dropna(subset=['year', 'value'])

fig = px.area(
    co2,
    x="year",
    y="value",
    color="country",
    title=" CO₂ Emissions Area Plot by Country Over Time",
    labels={"value": "CO₂ Emissions", "year": "Year"},
)

fig.update_layout(
    template="plotly_dark",
    height=600,
    margin=dict(t=50, b=40, l=10, r=10),
)

# Show the plot
fig.show()


In [None]:
import pandas as pd
import plotly.express as px

top1 = pd.read_csv("https://raw.githubusercontent.com/AkashGupta-26/IDS-2025-WritingAssignment/main/data/top1_data.csv", sep=';', engine='python', skiprows=1, header=None)
header = ["country", "variable", "percentile", "year", "value"]
top1.loc[-1] = header
top1.index = top1.index + 1
top1 = top1.sort_index()
top1.columns = top1.iloc[0]
top1 = top1[1:].reset_index(drop=True)

top10 = pd.read_csv("https://raw.githubusercontent.com/AkashGupta-26/IDS-2025-WritingAssignment/main/data/top10_data.csv", sep=';', engine='python', skiprows=1, header=None)
top10.loc[-1] = header
top10.index = top10.index + 1
top10 = top10.sort_index()
top10.columns = top10.iloc[0]
top10 = top10[1:].reset_index(drop=True)

bottom50 = pd.read_csv("https://raw.githubusercontent.com/AkashGupta-26/IDS-2025-WritingAssignment/main/data/bottom50_data.csv", sep=';', engine='python', skiprows=1, header=None)
bottom50.loc[-1] = header
bottom50.index = bottom50.index + 1
bottom50 = bottom50.sort_index()
bottom50.columns = bottom50.iloc[0]
bottom50 = bottom50[1:].reset_index(drop=True)

top1['category'] = 'Top 1%'
top10['category'] = 'Top 10%'
bottom50['category'] = 'Bottom 50%'

df = pd.concat([top1, top10, bottom50], ignore_index=True)
df.columns = df.columns.str.strip()
df['year'] = pd.to_numeric(df['year'], errors='coerce')
df['value'] = pd.to_numeric(df['value'], errors='coerce')

df.dropna(subset=['country', 'year', 'category', 'value'], inplace=True)

expected_categories = 3
grouped = df.groupby(['country', 'year'])['category'].nunique().reset_index()
complete_country_years = grouped[grouped['category'] == expected_categories]

df_complete = df.merge(complete_country_years[['country', 'year']], on=['country', 'year'])

countries_per_year = df_complete.groupby('year')['country'].nunique()
total_countries = df_complete['country'].nunique()
valid_years = countries_per_year[countries_per_year == total_countries].index

df_final = df_complete[df_complete['year'].isin(valid_years)]


fig = px.bar(
    df_final,
    x="country",
    y="value",
    color="category",
    animation_frame="year",
    barmode="group",
    title="Wealth Distribution by Country and Category Over Time (Only Complete Years)",
    labels={"value": "Wealth Value", "year": "Year"}
)

fig.update_layout(
    template="plotly_dark",
    height=700,
    margin=dict(t=100, b=200, l=50, r=50),
    legend_title="Category",
    xaxis=dict(
        title="Country",
        tickangle=45,
        tickfont=dict(size=10),
        automargin=True
    ),
    yaxis=dict(
        title="Wealth Value",
        showgrid=True
    ),
    updatemenus=[dict(
        type="buttons",
        direction="left",
        showactive=False,
        x=0.1,
        xanchor="left",
        y=-0.2,
        yanchor="bottom",
        buttons=[
            dict(label="Play", method="animate",
                 args=[None, {"frame": {"duration": 500, "redraw": True}, "fromcurrent": True}]),
            dict(label="Stop", method="animate",
                 args=[[None], {"mode": "immediate", "frame": {"duration": 0, "redraw": True},
                                "transition": {"duration": 0}}])
        ]
    )]
)

fig.update_layout(sliders=[{
    "active": 0,
    "yanchor": "top",
    "xanchor": "left",
    "currentvalue": {
        "font": {"size": 14},
        "prefix": "Year=",
        "visible": True,
        "xanchor": "right"
    },
    "transition": {"duration": 300, "easing": "cubic-in-out"},
    "pad": {"b": 10, "t": 60}
}])

fig.show()


Tesing commit and push