In [5]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split

In [6]:
# Load data
df = pd.read_csv('UHC_SCI_INFECT.csv')

In [7]:
simple_df = df[['TimeDim', 'SpatialDimensionValueCode', 'ParentLocationCode', 'NumericValue']].copy()

In [8]:
# Fill missing values with 'Unknown'
simple_df['ParentLocationCode'] = simple_df['ParentLocationCode'].fillna('Unknown')


In [9]:
# Convert country codes to numbers
simple_df['SpatialDimensionValueCode'] = simple_df['SpatialDimensionValueCode'].astype('category').cat.codes
simple_df['ParentLocationCode'] = simple_df['ParentLocationCode'].astype('category').cat.codes

In [10]:
# Features and target
X = simple_df[['TimeDim', 'SpatialDimensionValueCode', 'ParentLocationCode']]
y = simple_df['NumericValue']


In [11]:
# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [12]:
# Train model
model = RandomForestRegressor(random_state=42, n_estimators=100)
model.fit(X_train, y_train)



In [13]:
# Check accuracy
score = model.score(X_test, y_test)
print(f"Model R¬≤ score: {score:.3f}")


Model R¬≤ score: 0.670


In [14]:
# Make a prediction
sample = [[2025, 10, 1]]  # [Year, CountryCode, RegionCode]
prediction = model.predict(sample)[0]
print(f"Predicted coverage: {prediction:.1f}%")

Predicted coverage: 81.1%


In [15]:
# Save the simple model we just built
import joblib

# Save everything needed for predictions
model_package = {
    'model': model,
    'feature_names': ['TimeDim', 'SpatialDimensionValueCode', 'ParentLocationCode'],
    'country_codes': list(simple_df['SpatialDimensionValueCode'].astype('category').cat.categories),
    'region_codes': list(simple_df['ParentLocationCode'].astype('category').cat.categories)
}

joblib.dump(model_package, 'simple_uhc_model.pkl')
print("‚úÖ Model saved! Ready for deployment.")

‚úÖ Model saved! Ready for deployment.


In [16]:
# Load and use the saved model
loaded_package = joblib.load('simple_uhc_model.pkl')
loaded_model = loaded_package['model']
country_codes = loaded_package['country_codes']
region_codes = loaded_package['region_codes']

print(f"üìã Loaded {len(country_codes)} countries, {len(region_codes)} regions")

# Simple prediction function
def predict(year, country_index, region_index):
    prediction = loaded_model.predict([[year, country_index, region_index]])[0]
    return f"{prediction:.1f}%"

# Test it
print("üß™ Test predictions:")
print(f"2025, Country #10, Region #1: {predict(2025, 10, 1)}")
print(f"2030, Country #5, Region #2: {predict(2030, 5, 2)}")

üìã Loaded 210 countries, 7 regions
üß™ Test predictions:
2025, Country #10, Region #1: 81.1%
2030, Country #5, Region #2: 36.0%


In [17]:
# Predict for actual countries and regions
def predict_for_country(year, country_name, region_name):
    # Find the code for this country
    country_idx = country_codes.index(country_name) if country_name in country_codes else 0
    region_idx = region_codes.index(region_name) if region_name in region_codes else 0

    coverage = loaded_model.predict([[year, country_idx, region_idx]])[0]
    return coverage

# Show some real predictions
print("üåç Real predictions:")
print(f"BOL in 2025: {predict_for_country(2025, 'BOL', 'AMR'):.1f}%")
print(f"ZAF in 2030: {predict_for_country(2030, 'ZAF', 'AFR'):.1f}%")
print(f"EGY in 2024: {predict_for_country(2024, 'EGY', 'EMR'):.1f}%")

üåç Real predictions:
BOL in 2025: 43.5%
ZAF in 2030: 43.5%
EGY in 2024: 43.5%
