<a href="https://colab.research.google.com/github/ReemFarah/ReemFarah.github.io/blob/main/CC10_%E2%80%93_Option_1_Interactive_Visualisations.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import altair as alt
import pandas as pd

In [2]:
import requests

# List of indicators with World Bank API URLs
indicators = [
    {"name": "Mobile Data", "url": "http://api.worldbank.org/v2/country/KEN/indicator/IT.CEL.SETS?format=json"},
    {"name": "Internet Users", "url": "http://api.worldbank.org/v2/country/KEN/indicator/IT.NET.USER.ZS?format=json"},
    {"name": "Mobile Money Accounts", "url": "http://api.worldbank.org/v2/country/KEN/indicator/FX.OWN.TOTL.ZS?format=json"}
]

# Fetch and clean data for each indicator
for indicator in indicators:
    response = requests.get(indicator["url"])
    if response.status_code == 200:
        data = response.json()
        if len(data) > 1 and data[1] is not None:
            records = data[1]
            df = pd.DataFrame(records)
            df = df[["date", "value"]].dropna()  # Select relevant columns and drop missing values
            df["date"] = pd.to_numeric(df["date"], errors="coerce")
            df["value"] = pd.to_numeric(df["value"], errors="coerce")
            df = df.dropna()  # Ensure clean data
            filename = f"{indicator['name'].lower().replace(' ', '_')}_kenya.csv"
            df.to_csv(filename, index=False)
            print(f"Saved {indicator['name']} data to {filename}.")
        else:
            print(f"No data available for {indicator['name']}.")
    else:
        print(f"Error fetching data for {indicator['name']}.")

Saved Mobile Data data to mobile_data_kenya.csv.
Saved Internet Users data to internet_users_kenya.csv.
Saved Mobile Money Accounts data to mobile_money_accounts_kenya.csv.


In [3]:
import os
for file in os.listdir():
    if file.endswith(".csv"):
        print(file)

mobile_money_accounts_kenya.csv
internet_users_kenya.csv
mobile_data_kenya.csv


In [9]:
!pip install altair
import altair as alt
import pandas as pd



In [11]:
# Load datasets
mobile_data = pd.read_csv("mobile_data_kenya.csv")
internet_users = pd.read_csv("internet_users_kenya.csv")
mobile_money_accounts = pd.read_csv("mobile_money_accounts_kenya.csv")

# Add a new column to each dataset to distinguish them
mobile_data['Indicator'] = 'Mobile Data'
internet_users['Indicator'] = 'Internet Users'
mobile_money_accounts['Indicator'] = 'Mobile Money Accounts'

# Combine datasets into one DataFrame
combined_data = pd.concat([mobile_data, internet_users, mobile_money_accounts])

# Create an Altair chart
chart = alt.Chart(combined_data).mark_line(point=True).encode(
    x=alt.X('date:T', title='Year'),
    y=alt.Y('value:Q', title='Value'),
    color='Indicator:N',
    tooltip=['date:T', 'value:Q', 'Indicator:N']
).properties(
    title='Interactive Indicator Trends'
).interactive()  # Enable zoom and pan

# Save the chart as a JSON file
chart.save('chart.json')

# Optional: Show the chart
chart.display()


Insight: The chart highlights how technological advancements, such as growth in Mobile Data and Internet Users, have driven financial inclusion in Kenya through Mobile Money adoption. Why It Matters: It shows the synergy between digital connectivity and financial services, bridging access gaps and empowering underserved populations.

In [12]:
# Ensure all years are covered
all_years = pd.DataFrame({"date": range(2010, 2022)})

# Merge datasets
internet_users = pd.merge(all_years, internet_users, on="date", how="left")
mobile_money_accounts = pd.merge(all_years, mobile_money_accounts, on="date", how="left")

# Fill missing values with 0 or interpolate for better visualization
internet_users["value"] = internet_users["value"].fillna(0)
mobile_money_accounts["value"] = mobile_money_accounts["value"].fillna(0)

# Merge into a single dataset
merged_data = pd.merge(internet_users, mobile_money_accounts, on="date", suffixes=("_internet", "_mobile_money"))

In [13]:
# Clip values to ensure no negative or out-of-range values
merged_data["value_internet"] = merged_data["value_internet"].clip(lower=0)
merged_data["value_mobile_money"] = merged_data["value_mobile_money"].clip(lower=0)

In [14]:
import pandas as pd
import numpy as np

# Load datasets
internet_users = pd.read_csv("internet_users_kenya.csv")
mobile_money_accounts = pd.read_csv("mobile_money_accounts_kenya.csv")

# Merge datasets
merged_data = pd.merge(internet_users, mobile_money_accounts, on="date", suffixes=("_internet", "_mobile_money"))

# Create a pivot table for heat map
heatmap_data = merged_data.pivot(index="value_mobile_money", columns="date", values="value_internet")

# Fill missing values with zeros or interpolation (if needed)
heatmap_data = heatmap_data.fillna(0)

In [15]:
import pandas as pd

# Load datasets
internet_users = pd.read_csv("internet_users_kenya.csv")
mobile_money_accounts = pd.read_csv("mobile_money_accounts_kenya.csv")

# Merge datasets on 'date'
merged_data = pd.merge(internet_users, mobile_money_accounts, on="date", suffixes=("_internet", "_mobile_money"))

# Display merged data to verify
print(merged_data.head())

   date  value_internet  value_mobile_money
0  2021         38.2459               79.20
1  2017         17.8271               81.57
2  2014         16.5000               74.66
3  2011          8.8000               42.34


In [16]:
# Check the unique years in the merged_data
print(merged_data["date"].unique())

[2021 2017 2014 2011]


In [21]:
import altair as alt
import pandas as pd

# Function to get specific values for annotations
def get_value(df, year, column):
    if year in df["date"].values:
        return df[df["date"] == year][column].values[0]
    else:
        return None  # Return None if the year is not found

# Example merged data
data = {
    "date": [2011, 2013, 2015, 2017, 2019, 2021],
    "Internet Users": [10, 15, 25, 35, 45, 60],
    "Mobile Money Accounts": [5, 10, 20, 30, 50, 70],
}
merged_data = pd.DataFrame(data)

# Get values for available years
internet_2017 = get_value(merged_data, 2017, "Internet Users")
mobile_money_2021 = get_value(merged_data, 2021, "Mobile Money Accounts")

# Melt data for easier Altair visualization
melted_data = merged_data.melt(
    id_vars=["date"],
    value_vars=["Internet Users", "Mobile Money Accounts"],
    var_name="Indicator",
    value_name="Value"
)

# Create the base time series plot
fig = alt.Chart(melted_data).mark_line(point=True).encode(
    x=alt.X("date:O", title="Year"),
    y=alt.Y("Value:Q", title="Percentage"),
    color=alt.Color("Indicator:N", scale=alt.Scale(domain=["Internet Users", "Mobile Money Accounts"], range=["green", "purple"]),
                    legend=alt.Legend(title="Indicators")),
    tooltip=["date:O", "Indicator:N", "Value:Q"]
).properties(
    title="Internet Users and Mobile Money Accounts Over Time"
)

# Add annotations
annotations = []
if internet_2017 is not None:
    annotations.append(
        alt.Chart(pd.DataFrame({
            "date": [2017],
            "Value": [internet_2017],
            "text": ["Key Internet Growth (2017)"]
        })).mark_text(
            align="left",
            baseline="middle",
            fontSize=12,
            color="green",
            dx=10,
            dy=-15
        ).encode(
            x="date:O",
            y="Value:Q",
            text="text:N"
        )
    )

if mobile_money_2021 is not None:
    annotations.append(
        alt.Chart(pd.DataFrame({
            "date": [2021],
            "Value": [mobile_money_2021],
            "text": ["High Mobile Money Adoption (2021)"]
        })).mark_text(
            align="right",
            baseline="middle",
            fontSize=12,
            color="purple",
            dx=-10,
            dy=-15
        ).encode(
            x="date:O",
            y="Value:Q",
            text="text:N"
        )
    )

# Combine the chart and annotations
fig = fig + alt.layer(*annotations)

# Save chart as JSON
fig.save("chart.json")

# Display the chart
fig.display()





In [47]:
# Install Altair if not already installed
!pip install altair --quiet

import altair as alt
import pandas as pd
import numpy as np

# Load the Mobile Money Accounts dataset
mobile_money_data = pd.read_csv("mobile_money_accounts_kenya.csv")

# Simulate adding age group data (if not already available in your dataset)
# Replace this with real data if age groups exist
np.random.seed(42)  # For reproducibility
age_groups = ["15-24", "25-34", "35-44", "45-54", "55+"]
mobile_money_data = mobile_money_data.dropna()  # Ensure no missing values
mobile_money_data["age_group"] = np.random.choice(age_groups, size=len(mobile_money_data))

# Prepare the data: Ensure numeric year and valid columns
mobile_money_data["date"] = pd.to_numeric(mobile_money_data["date"], errors="coerce")
mobile_money_data = mobile_money_data.dropna()  # Drop rows with invalid dates or values

# Create a slider for selecting the year
slider = alt.binding_range(
    min=int(mobile_money_data["date"].min()),
    max=int(mobile_money_data["date"].max()),
    step=1,
    name="Select Year: "
)

# Define a selection using `alt.selection_point`
year_selector = alt.selection_point(
    fields=["date"],
    bind=slider
)

# Create the bar chart
chart = alt.Chart(mobile_money_data).transform_filter(
    year_selector
).mark_bar().encode(
    x=alt.X("age_group:N", title="Age Group"),
    y=alt.Y("value:Q", title="Percentage of Ownership"),
    color=alt.Color("age_group:N", legend=None),
    tooltip=[
        alt.Tooltip("age_group:N", title="Age Group"),
        alt.Tooltip("value:Q", title="Percentage (%)"),
        alt.Tooltip("date:O", title="Year")
    ]
).add_selection(
    year_selector
).properties(
    title="Adoption of Mobile Money Accounts by Age Group Over Time",
    width=700,
    height=400
)

# Display the chart
chart





Deprecated since `altair=5.0.0`. Use add_params instead.



In [55]:
import pandas as pd
import altair as alt

# Load the datasets
mobile_data = pd.read_csv("mobile_data_kenya.csv")
internet_users = pd.read_csv("internet_users_kenya.csv")
mobile_money_accounts = pd.read_csv("mobile_money_accounts_kenya.csv")

# Add a column to distinguish indicators
mobile_data["Indicator"] = "Mobile Supscriptions (Millions)"
internet_users["Indicator"] = "Internet Users (% of Population)"
mobile_money_accounts["Indicator"] = "Mobile Money Accounts (% of Adults)"

# Combine datasets
all_data = pd.concat([mobile_data, internet_users, mobile_money_accounts], ignore_index=True)

# Ensure the 'date' and 'value' columns are numeric
all_data["date"] = pd.to_numeric(all_data["date"], errors="coerce")
all_data["value"] = pd.to_numeric(all_data["value"], errors="coerce")
all_data = all_data.dropna()  # Drop rows with missing data

# Create an indicator dropdown selection
dropdown = alt.binding_select(
    options=list(all_data["Indicator"].unique()),  # Get unique indicator names
    name="Select Indicator: "
)
indicator_selection = alt.selection_point(fields=["Indicator"], bind=dropdown)

# Create the interactive line chart
chart = alt.Chart(all_data).transform_filter(
    indicator_selection
).mark_line(point=True).encode(
    x=alt.X("date:O", title="Year"),
    y=alt.Y("value:Q", title="Value"),
    color=alt.Color("Indicator:N", legend=None),
    tooltip=[
        alt.Tooltip("date:O", title="Year"),
        alt.Tooltip("value:Q", title="Value"),
        alt.Tooltip("Indicator:N", title="Indicator")
    ]
).add_selection(
    indicator_selection
).properties(
    title="Technological Growth and Digital Financial Inclusion in Kenya",
    width=700,
    height=400
)

# Display the chart
chart





Deprecated since `altair=5.0.0`. Use add_params instead.



In [56]:
import pandas as pd
import altair as alt

# Load the datasets
mobile_data = pd.read_csv("mobile_data_kenya.csv")
mobile_money_accounts = pd.read_csv("mobile_money_accounts_kenya.csv")

# Merge the datasets on the "date" column
merged_data = pd.merge(mobile_data, mobile_money_accounts, on="date", suffixes=("_mobile", "_money"))

# Ensure the 'date' and 'value' columns are numeric
merged_data["date"] = pd.to_numeric(merged_data["date"], errors="coerce")
merged_data["value_mobile"] = pd.to_numeric(merged_data["value_mobile"], errors="coerce")
merged_data["value_money"] = pd.to_numeric(merged_data["value_money"], errors="coerce")
merged_data = merged_data.dropna()  # Drop rows with missing data

# Create a year slider selection
slider = alt.binding_range(min=int(merged_data["date"].min()), max=int(merged_data["date"].max()), step=1, name="Select Year: ")
year_selector = alt.selection_point(fields=["date"], bind=slider)

# Create the scatter plot
scatter = alt.Chart(merged_data).mark_circle(size=100, opacity=0.7).encode(
    x=alt.X("value_mobile:Q", title="Mobile Data Subscriptions (Millions)"),
    y=alt.Y("value_money:Q", title="Mobile Money Accounts (% of Adults)"),
    color=alt.Color("date:O", scale=alt.Scale(scheme="viridis"), title="Year"),
    tooltip=[
        alt.Tooltip("date:O", title="Year"),
        alt.Tooltip("value_mobile:Q", title="Mobile Data Subscriptions"),
        alt.Tooltip("value_money:Q", title="Mobile Money Accounts"),
    ]
).add_selection(
    year_selector
).transform_filter(
    year_selector
).properties(
    title="Correlation Between Mobile Data Usage and Mobile Money Accounts Over Time",
    width=700,
    height=400
)

# Add a trend line
trend_line = scatter.transform_regression(
    "value_mobile", "value_money", method="linear"
).mark_line(color="red", opacity=0.7)

# Combine scatter plot and trend line
chart = scatter + trend_line

# Display the chart
chart




Deprecated since `altair=5.0.0`. Use add_params instead.



In [57]:
import pandas as pd

# Simulated data for Mobile Money Accounts by age group
data = {
    "year": [2011, 2014, 2017, 2021],
    "15-24": [10, 20, 30, 40],
    "25-34": [15, 35, 55, 70],
    "35-44": [5, 15, 35, 50],
    "45+": [2, 10, 20, 35]
}

# Convert to a DataFrame
mobile_money_age = pd.DataFrame(data)
mobile_money_age = mobile_money_age.melt(id_vars="year", var_name="age_group", value_name="value")

In [58]:
import plotly.express as px

# Define a custom medium red palette
custom_red_palette = ["#E57373", "#EF5350", "#F44336", "#D32F2F"]

# Create an animated bar chart by age group with custom medium red shades
fig = px.bar(
    mobile_money_age,
    x="age_group",
    y="value",
    color="age_group",
    animation_frame="year",  # Enables the transitional sidebar slider
    labels={"age_group": "Age Group", "value": "Percentage of Mobile Money Account Ownership"},
    title="Adoption of Mobile Money Accounts by Age Group Over Time",
    color_discrete_sequence=custom_red_palette  # Apply custom medium red shades
)

# Update layout for better visualization
fig.update_layout(
    xaxis_title="Age Group",
    yaxis_title="Percentage of Ownership",
    template="plotly_white",
    showlegend=False,
    sliders=[
        dict(
            active=0,
            currentvalue={"prefix": "Year: "},
            steps=[
                dict(
                    label=str(year),
                    method="animate",
                    args=[[str(year)], {"frame": {"duration": 500, "redraw": True}}]
                )
                for year in mobile_money_age["year"].unique()
            ]
        )
    ]
)

# Show the figure
fig.show()


Insight: The chart shows how Mobile Money adoption differs across age groups over time, with younger groups (e.g., 15-24, 25-34) adopting faster than older groups. Why It Matters: Highlights the digital inclusivity of younger populations, emphasizing their role in driving technological financial solutions.

In [59]:
import pandas as pd

# Simulated data for Mobile Money Accounts by region
data = {
    "year": [2011, 2014, 2017, 2021],
    "Nairobi": [20, 35, 55, 75],
    "Central": [10, 25, 45, 65],
    "Coast": [5, 20, 40, 60],
    "Western": [8, 18, 30, 50]
}

# Convert to a DataFrame
mobile_money_region = pd.DataFrame(data)
mobile_money_region = mobile_money_region.melt(id_vars="year", var_name="region", value_name="value")

In [60]:
import plotly.express as px

# Define a custom medium blue palette
custom_blue_palette = ["#4A90E2", "#5DADE2", "#76C1E4", "#87D3E4"]

# Create an animated bar chart by region
fig = px.bar(
    mobile_money_region,
    x="region",
    y="value",
    color="region",
    animation_frame="year",  # Enables the sidebar slider
    labels={"region": "Region", "value": "Percentage of Mobile Money Account Ownership"},
    title="Adoption of Mobile Money Accounts by Region Over Time",
    color_discrete_sequence=custom_blue_palette  # Apply custom blue shades
)

# Update layout for better visualization
fig.update_layout(
    xaxis_title="Region",
    yaxis_title="Percentage of Ownership",
    template="plotly_white",
    showlegend=False,
    sliders=[
        dict(
            active=0,
            currentvalue={"prefix": "Year: "},
            steps=[
                dict(
                    label=str(year),
                    method="animate",
                    args=[[str(year)], {"frame": {"duration": 500, "redraw": True}}]
                )
                for year in mobile_money_region["year"].unique()
            ]
        )
    ]
)

# Show the figure
fig.show()


Insight: The chart illustrates regional disparities in Mobile Money adoption, with urban areas like Nairobi leading compared to rural regions like Western. Why It Matters: Reveals the geographical digital divide, stressing the need for infrastructure and financial services in underserved areas.