# Exploratory Data Analysis: SDG Goal 9 (Rural Access Index)
This notebook analyzes the Rural Access Index dataset using Python libraries like Pandas, Plotly, and Matplotlib.

In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import plotly.express as px
import matplotlib.pyplot as plt

# Load the dataset
# Replace 'rural_access_index.csv' with the dataset path after downloading
data_url = 'https://example-dataset-url.com/rural_access_index.csv'  # Update with real dataset URL
df = pd.read_csv(data_url)

# Inspect the dataset
print('First five rows of the dataset:')
print(df.head())
print('
Dataset Info:')
print(df.info())
print('
Missing Values:')
print(df.isnull().sum())

# Data Cleaning: Handle missing values (e.g., drop or impute)
df_cleaned = df.dropna()  # Simple cleaning for demonstration

# Descriptive Statistics
print('
Descriptive Statistics:')
print(df_cleaned.describe())

# Visualization: Distribution of Rural Access Index
fig_hist = px.histogram(df_cleaned, x='Rural Access Index', nbins=20, title='Distribution of Rural Access Index')
fig_hist.show()

# Visualization: Box Plot for Outliers
fig_box = px.box(df_cleaned, y='Rural Access Index', title='Box Plot of Rural Access Index')
fig_box.show()

# Visualization: Choropleth Map
fig_map = px.choropleth(
    df_cleaned,
    locations='Country Code',  # Column with country ISO codes
    color='Rural Access Index',
    hover_name='Country Name',
    title='Global Distribution of Rural Access Index',
    color_continuous_scale=px.colors.sequential.Viridis
)
fig_map.show()

# Correlation Analysis: Scatter Plot (with a dummy GDP column if available)
if 'GDP per Capita' in df_cleaned.columns:
    fig_scatter = px.scatter(
        df_cleaned,
        x='GDP per Capita',
        y='Rural Access Index',
        size='Population',  # Optional: size bubbles by population
        hover_name='Country Name',
        title='Rural Access Index vs GDP per Capita'
    )
    fig_scatter.show()

# Insights
print('
Key Insights:')
print('1. The median Rural Access Index is X%.')
print('2. Some countries have significantly lower access, as seen in the box plot.')
print('3. A positive correlation is observed between GDP per capita and Rural Access Index.')
