Importing Libraries and Configurations for Data Analysis and Visualization

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import folium
import plotly.express as px
import warnings
warnings.filterwarnings("ignore")


: 

Loading and Importing Project Datasets

In [None]:
avg_distance = pd.read_csv("/content/Average distance of hospitals to National Highways and State Highways.csv")
bottom5 = pd.read_csv("/content/Bottom 5 districts with lowest number of hospitals.csv")
nh_sh_counts = pd.read_csv("/content/Count of hospitals near NH vs SH.csv")
district_rank_hosp = pd.read_csv("/content/Districts ranked by hospital count.csv")
district_rank_pop = pd.read_csv("/content/Districts ranked by hospitals per 100,000 population.csv")
outliers = pd.read_csv("/content/Districts with population over 1 million but less than 10 hospitals(outlier).csv")
district_facility_counts = pd.read_csv("/content/District-wise hospital,facility counts.csv")
pop_ratio = pd.read_csv("/content/District-wise Hospitals & Population Ratio.csv")
hospital_coords = pd.read_csv("/content/Extract hospital coordinates (longitude, latitude) for heatmap visualization.csv")
hosp_access=pd.read_csv("/content/People-to-Hospital Ratios Categorized by Access Level.csv")
hosp_density=pd.read_csv("/content/Hospital density per square kilometer by district (choropleth preparation).csv")
emg_fac=pd.read_csv("/content/Facilities with emergency services by district.csv")

Visualizing District-wise Hospital Counts Using a Bar Plot

In [None]:
plt.figure(figsize=(12,6))
sns.barplot(data=district_facility_counts, x="district", y="hospital_count", palette="viridis")
plt.xticks(rotation=90)
plt.title("District-wise Hospital Counts in Rajasthan")
plt.show()


Identifying and Visualizing Top 10 Underserved Districts by People per Hospital

In [None]:
top10 = pop_ratio.sort_values("people_per_hospital", ascending=False).head(10)
plt.figure(figsize=(10,6))
ax = sns.barplot(data=top10, x="district", y="people_per_hospital", palette="magma")
plt.xticks(rotation=90)
plt.title("Top 10 Underserved Districts (People per Hospital)")

for p in ax.patches:
    ax.text(p.get_x() + p.get_width()/2, p.get_height() + 100, int(p.get_height()),
            ha='center', va='bottom', fontsize=10)

plt.show()


Comparing Hospital Counts Near National and State Highways

In [None]:
ax = sns.barplot(data=nh_sh_counts, x="roadcatego", y="hospital_count", palette="Set2")
plt.title("Hospitals near National vs State Highways (within 5 km)")

for p in ax.patches:
    ax.text(p.get_x() + p.get_width()/2, p.get_height() + 0.5, int(p.get_height()),
            ha='center', va='bottom', fontsize=10)

plt.show()


Creating a Hospital Location Heatmap Using Folium

In [None]:
from folium.plugins import HeatMap

m = folium.Map(location=[26.9, 75.8], zoom_start=6, tiles="CartoDB positron")

heat_data = hospital_coords[["latitude", "longitude"]].values.tolist()
HeatMap(heat_data, radius=7, blur=4).add_to(m)

m.save("hospital_heatmap.html")
m


Visualizing Hospitals vs Population by District Using Scatter Plot

In [None]:
fig = px.scatter(pop_ratio,
                 x="total_population", y="hospital_count",
                 size="hospitals_per_lakh", color="district",
                 hover_name="district", title="Hospitals vs Population by District")
fig.show()


Highlighting Bottom 5 Districts by Hospital Count

In [None]:
plt.figure(figsize=(8,5))
bars = plt.bar(bottom5["district"], bottom5["hospital_count"], color="skyblue", edgecolor="black")

for bar in bars:
    yval = bar.get_height()
    plt.text(bar.get_x() + bar.get_width()/2, yval + 1, str(yval),
             ha='center', va='bottom', fontsize=10, fontweight='bold')

plt.title("Bottom 5 Districts by Hospital Count")
plt.xlabel("District")
plt.ylabel("Number of Hospitals")
plt.show()

Hospitals per Lakh Population by District

In [None]:
df = district_rank_pop.sort_values("rank_by_ratio")

plt.figure(figsize=(12,8))
sns.barplot(data=district_rank_pop, x="hospitals_per_lakh", y="distname", palette="viridis")

for index, value in enumerate(df["hospitals_per_lakh"]):
    plt.text(value + 0.1, index, str(value), va="center", fontsize=9)

plt.title("Hospitals per Lakh Population by District (Rajasthan)", fontsize=14, weight="bold")
plt.xlabel("Hospitals per Lakh Population")
plt.ylabel("District")
plt.tight_layout()
plt.show()

People per Hospital by District

In [None]:
plt.figure(figsize=(12,6))
sns.barplot(data=pop_ratio.sort_values("people_per_hospital", ascending=False),
            x="district", y="people_per_hospital", palette="magma")
plt.xticks(rotation=90)
plt.title("People per Hospital (Higher = Worse)")
plt.ylabel("People per Hospital")
plt.show()


District-wise Healthcare Accessibility Levels (Pie Chart)

In [None]:
plt.figure(figsize=(6,6))
hosp_access['access_level'].value_counts().plot.pie(autopct='%1.1f%%', colors=['#e74c3c','#f1c40f','#2ecc71'])
plt.title("Healthcare Accessibility Levels (District-wise)")
plt.ylabel("")
plt.show()

Best vs Worst Districts in Healthcare Access

In [None]:
top_bottom = pd.concat([
    hosp_access.nsmallest(5, "people_per_hospital"),
    hosp_access.nlargest(5, "people_per_hospital")
])

plt.figure(figsize=(10,6))
ax = sns.barplot(
    data=top_bottom, x="distname", y="people_per_hospital", hue="access_level",
    dodge=False, palette={"Poor":"#e74c3c","Average":"#f1c40f","Good":"#2ecc71"}
)

plt.xticks(rotation=45)
plt.title("Best vs Worst Districts in Healthcare Access", fontsize=14)
plt.ylabel("People per Hospital")
plt.xlabel("District")

for p in ax.patches:
    ax.text(
        p.get_x() + p.get_width()/2,
        p.get_height() + 1000,
        int(p.get_height()),
        ha="center", va="bottom", fontsize=9, color="black"
    )

plt.tight_layout()
plt.show()


Analysis of Hospitals Near National Highways

In [None]:
import pandas as pd
import plotly.express as px

df = pd.read_csv("Hospitals within 10 km of National Highways (NH).csv")

district_counts = df['district'].value_counts().reset_index()
district_counts.columns = ['District', 'Number of Hospitals']

fig1 = px.bar(district_counts, x='District', y='Number of Hospitals',
              title='Number of Hospitals per District')
fig1.show()

fig2 = px.histogram(df, x='distance_meters', nbins=40,
                    title='Distribution of Hospital Distances from NH')
fig2.show()

nh_counts = df['nearest_nh'].value_counts().reset_index()
nh_counts.columns = ['Nearest National Highway', 'Number of Hospitals']

fig3 = px.bar(nh_counts, x='Nearest National Highway', y='Number of Hospitals',
              title='Hospitals per National Highway')
fig3.show()

fig4 = px.scatter(df, x='distance_meters', y='district',
                  title='Hospital Distance vs District', hover_data=['hospital_name'])
fig4.show()


Analysis of Hospitals Near State Highways

In [None]:
import pandas as pd
import plotly.express as px

df = pd.read_csv("Hospitals within 10 km of State Highways (SH).csv")

top_districts = df['district'].value_counts().nlargest(10)
fig1 = px.bar(top_districts, x=top_districts.index, y=top_districts.values, title='Top 10 Districts by Hospitals')
fig1.show()

fig2 = px.histogram(df, x='distance_meters', nbins=10, title='Hospital Distance Distribution')
fig2.show()

top_highways = df['nearest_sh'].value_counts().nlargest(10)
fig3 = px.bar(top_highways, x=top_highways.index, y=top_highways.values, title='Top 10 Highways by Hospitals')
fig3.show()


Distance Analysis to Nearest Hospital by District

In [None]:
df = pd.read_csv("Nearest hospital to each district centroid.csv")
df = df.dropna(subset=["nearest_hospital"])
mean_distance = df["distance_km"].mean()
median_distance = df["distance_km"].median()
plt.figure(figsize=(10,6))
sns.histplot(df["distance_km"], bins=10, kde=True, color="#3498db", edgecolor="black")
plt.axvline(mean_distance, color='red', linestyle='--', linewidth=2, label=f"Mean = {mean_distance:.2f} km")
plt.axvline(median_distance, color='green', linestyle='--', linewidth=2, label=f"Median = {median_distance:.2f} km")

plt.title("Distribution of Distances to Nearest Hospital by District (Rajasthan)", fontsize=14, weight="bold")
plt.xlabel("Distance to Nearest Hospital (km)", fontsize=12)
plt.ylabel("Number of Districts", fontsize=12)
plt.legend()
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()
plt.show()


Top 10 Districts by Hospital Density

In [None]:
hosp_density["hospitals_per_sqkm"] = hosp_density["hospital_count"] / hosp_density["district_area_sqkm"]

df_sorted = hosp_density.sort_values("hospitals_per_sqkm", ascending=False).head(10)

plt.figure(figsize=(10,6))
sns.barplot(data=df_sorted, x="district", y="hospitals_per_sqkm", palette="viridis")

for i, v in enumerate(df_sorted["hospitals_per_sqkm"]):
    plt.text(i, v + 0.001, f"{v:.3f}", ha="center", va="bottom", fontsize=9)

plt.title("Top 10 Districts by Hospital Density (Hospitals per Sq.Km)", fontsize=14, weight="bold")
plt.xlabel("District")
plt.ylabel("Hospitals per Sq.Km")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

Top Districts by Percentage of Emergency Healthcare Facilities

In [None]:
top_emergency = emg_fac[emg_fac['emergency_facilities'] > 0].sort_values("percent_emergency", ascending=False).head(10)

plt.figure(figsize=(10,6))
plt.barh(top_emergency["addr:district"], top_emergency["percent_emergency"], color="tomato")
plt.xlabel("Emergency Facilities (%)")
plt.ylabel("District")
plt.title("Top Districts by % of Emergency Healthcare Faciliteis")
plt.gca().invert_yaxis()
plt.show()


Share of Emergency vs Non-Emergency Healthcare Facilities

In [None]:
total_emergency = emg_fac["emergency_facilities"].sum()
total_all = emg_fac["total_facilities"].sum()

plt.figure(figsize=(6,6))
plt.pie([total_emergency, total_all-total_emergency], labels=["Emergency", "Non-Emergency"],
        autopct="%.2f%%", colors=["red","lightgrey"], startangle=90)
plt.title("Share of Emergency vs Non-Emergency Facilities in Rajasthan")
plt.show()

District-wise Distribution of Healthcare Facility Types

In [None]:
df =pd.read_csv("Facility type breakdown per district.csv")

pivot_df = df.pivot_table(index="addr:district",
                          columns="amenity",
                          values="facility_count",
                          aggfunc="sum",
                          fill_value=0)

pivot_df["Total"] = pivot_df.sum(axis=1)
pivot_df = pivot_df.sort_values("Total", ascending=False)

pivot_df[["hospital","clinic","doctors"]].plot(
    kind="bar", stacked=True, figsize=(14,7),
    color=["#3498db","#f1c40f","#2ecc71"]
)

plt.title("Healthcare Facilities Distribution by District", fontsize=16)
plt.ylabel("Number of Facilities")
plt.xlabel("District")
plt.xticks(rotation=45, ha="right")
plt.legend(title="Facility Type")
plt.tight_layout()
plt.show()
