In [14]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
import os

# ===============================
# Reproducibility
# ===============================
torch.manual_seed(42)
np.random.seed(42)

# ===============================
# 1. LOAD DATA
# ===============================
WORLD_CITIES_PATH = "worldcities.csv"
CLIMATE_PATH = "climate.csv"




In [15]:
cities = pd.read_csv(WORLD_CITIES_PATH)[["city", "lat", "lng"]].dropna()

climate_raw = pd.read_csv(CLIMATE_PATH)

print("Climate columns:", climate_raw.columns.tolist())



Climate columns: ['Land-Ocean: Global Means', 'Unnamed: 1', 'Unnamed: 2', 'Unnamed: 3', 'Unnamed: 4', 'Unnamed: 5', 'Unnamed: 6', 'Unnamed: 7', 'Unnamed: 8', 'Unnamed: 9', 'Unnamed: 10', 'Unnamed: 11', 'Unnamed: 12', 'Unnamed: 13', 'Unnamed: 14', 'Unnamed: 15', 'Unnamed: 16', 'Unnamed: 17', 'Unnamed: 18']


In [16]:
# Convert everything numeric where possible
climate_numeric = climate_raw.apply(pd.to_numeric, errors="coerce")



In [17]:
# Drop empty columns
climate_numeric = climate_numeric.dropna(axis=1, how="all")



In [18]:
# ===============================
# 2. CLIMATE FEATURE ENGINEERING
# ===============================
def compute_trend(series):
    x = np.arange(len(series))
    y = series.values
    mask = ~np.isnan(y)
    if mask.sum() < 5:
        return 0.0
    m, _ = np.polyfit(x[mask], y[mask], 1)
    return m



In [19]:
climate_features = pd.DataFrame({
    "avg_temp": climate_numeric.mean(axis=1),
    "temp_trend": climate_numeric.apply(compute_trend, axis=1),
    "temp_volatility": climate_numeric.std(axis=1)
})



In [20]:
# Use global climate signal (same signal applied to all cities)
global_climate = climate_features.mean()



In [21]:
# ===============================
# 3. BUILD CITY FEATURE TABLE
# ===============================
records = []

for _, row in cities.iterrows():
    records.append({
        "city": row["city"],
        "avg_temp": global_climate["avg_temp"],
        "temp_trend": global_climate["temp_trend"],
        "temp_volatility": global_climate["temp_volatility"],
        # Infrastructure proxies (explicitly marked assumptions)
        "energy_cost_index": np.random.uniform(0.05, 0.25),
        "grid_stability": np.random.uniform(0.7, 1.0),
        "fiber_score": np.random.uniform(0.4, 1.0),
        "flood_risk": np.random.uniform(0, 1),
        "land_cost": np.random.uniform(0, 1),
    })

df = pd.DataFrame(records)


In [22]:
# ===============================
# 4. TARGET DEFINITION
# ===============================
raw_score = (
    -0.3 * df["avg_temp"]
    -15 * df["temp_trend"]
    -5 * df["temp_volatility"]
    -25 * df["flood_risk"]
    -20 * df["energy_cost_index"]
    +30 * df["grid_stability"]
    +25 * df["fiber_score"]
    -15 * df["land_cost"]
)

df["target"] = (raw_score - raw_score.min()) / (raw_score.max() - raw_score.min()) * 100

In [23]:
# ===============================
# 5. MODEL
# ===============================
class DatacenterSuitabilityModel(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.net(x) * 100



In [24]:
# ===============================
# 6. TRAINING
# ===============================
features = df.drop(columns=["city", "target"])
X = features.values
y = df["target"].values.reshape(-1, 1)

scaler = StandardScaler()
X = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

model = DatacenterSuitabilityModel(X.shape[1])
optimizer = optim.AdamW(model.parameters(), lr=0.001)
criterion = nn.MSELoss()

for epoch in range(300):
    optimizer.zero_grad()
    loss = criterion(model(torch.FloatTensor(X_train)), torch.FloatTensor(y_train))
    loss.backward()
    optimizer.step()

model.eval()
with torch.no_grad():
    preds = model(torch.FloatTensor(X_test)).numpy()

print("\nEvaluation:")
print("MSE:", mean_squared_error(y_test, preds))
print("R²:", r2_score(y_test, preds))


Evaluation:
MSE: 0.47554870702018165
R²: 0.998163785637125


In [25]:
# ===============================
# 7. RANK CITIES
# ===============================
with torch.no_grad():
    df["predicted_suitability"] = model(torch.FloatTensor(X)).numpy()

ranked = df.sort_values("predicted_suitability", ascending=False)



In [26]:
ranked[["city", "predicted_suitability"]].head(10)

Unnamed: 0,city,predicted_suitability
30546,Kutchan,91.506683
45850,Pedreguer,90.856422
5889,Gillingham,90.705643
38588,Cerro Corá,90.50975
26406,Rio Maria,90.328957
18395,Xizhou,90.276077
13694,Zacualpa,90.026375
20148,Tecax,89.902115
17457,Ob,89.862946
46977,Berri,89.849442
