# Import Required Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.cluster import KMeans
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error

sns.set(style='whitegrid')

# Load Sample Data

In [None]:
np.random.seed(42)
regions = [f"Region_{i}" for i in range(1, 11)]
data = {
    "region": regions,
    "infra_access_score": np.random.randint(20, 81, size=10),
    "internet_penetration": np.round(np.random.uniform(0.2, 0.9, size=10), 2),
    "startup_density": np.random.randint(0, 31, size=10),
    "education_index": np.round(np.random.uniform(0.4, 0.9, size=10), 2),
    "unemployment_rate": np.round(np.random.uniform(0.05, 0.25, size=10), 2)
}
df = pd.DataFrame(data)
df

# Cluster Underserved Regions

In [None]:
X_cluster = df[['infra_access_score', 'internet_penetration']]
kmeans = KMeans(n_clusters=3, random_state=42)
df['cluster'] = kmeans.fit_predict(X_cluster)

plt.figure(figsize=(8, 6))
sns.scatterplot(data=df, x='infra_access_score', y='internet_penetration', hue='cluster', palette='Set2')
plt.title("Clustering of Regions based on Infrastructure and Internet Access")
plt.xlabel("Infrastructure Access Score")
plt.ylabel("Internet Penetration")
plt.show()

# Calculate Innovation Index

In [None]:
scaler = MinMaxScaler()
df[['startup_density_norm', 'education_index_norm']] = scaler.fit_transform(df[['startup_density', 'education_index']])
df['innovation_index'] = (df['startup_density_norm'] + df['education_index_norm']) / 2
df[['region', 'innovation_index']]

# Predict Economic Upliftment

In [None]:
df['economic_upliftment'] = (0.6 * df['infra_access_score'] + 0.4 * df['innovation_index'] * 100) + np.random.normal(0, 5, size=10)

X = df[['infra_access_score', 'innovation_index']]
y = df['economic_upliftment']

model = GradientBoostingRegressor(random_state=42)
model.fit(X, y)
df['upliftment_predicted'] = model.predict(X)

df_sorted = df[['region', 'upliftment_predicted']].sort_values(by='upliftment_predicted', ascending=False)
df_sorted.reset_index(drop=True, inplace=True)
df_sorted