In [29]:
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from geopy.geocoders import Nominatim
from prophet import Prophet
import matplotlib.pyplot as plt
import googlemaps


In [31]:
gmaps = googlemaps.Client(key='AIzaSyBDBEHZReEr8Zyc_MKNucPPSUkjMl6YhBA')


In [33]:
data = pd.read_csv('high_low_region.csv')
data['Date'] = pd.to_datetime(data['Date'])
data['month'] = data['Date'].dt.month
data['season'] = data['Date'].dt.month % 12 // 3 + 1


In [35]:
features = data[['Temperature', 'Humidity', 'Rainfall', 'Malaria Cases', 'Dengue Cases', 
                 'Population Density', 'Water Body Nearby', 'Green Cover', 'Healthcare Facilities']]
scaler = StandardScaler()
scaled_features = scaler.fit_transform(features)


In [37]:
kmeans = KMeans(n_clusters=3, random_state=42)
data['risk_cluster'] = kmeans.fit_predict(scaled_features)


In [39]:
risk_labels = {0: 'Low', 1: 'Medium', 2: 'High'}
data['Predicted Risk Level'] = data['risk_cluster'].map(risk_labels)


In [41]:
prophet_data_malaria = data[['Date', 'Malaria Cases']].rename(columns={'Date': 'ds', 'Malaria Cases': 'y'})
prophet_data_dengue = data[['Date', 'Dengue Cases']].rename(columns={'Date': 'ds', 'Dengue Cases': 'y'})


In [43]:
malaria_model = Prophet(seasonality_mode='multiplicative')
malaria_model.fit(prophet_data_malaria)

dengue_model = Prophet(seasonality_mode='multiplicative')
dengue_model.fit(prophet_data_dengue)


10:05:51 - cmdstanpy - INFO - Chain [1] start processing
10:05:52 - cmdstanpy - INFO - Chain [1] done processing
10:05:52 - cmdstanpy - INFO - Chain [1] start processing
10:05:52 - cmdstanpy - INFO - Chain [1] done processing


<prophet.forecaster.Prophet at 0x17967c6ef60>

In [45]:
def predict_risk(location_name, month=None, season=None):
    geolocator = Nominatim(user_agent="geoapiExercises")
    try:
        location = gmaps.geocode(location_name)
        if not location:
            print("Location not found.")
            return
        latitude, longitude = location[0]['geometry']['location']['lat'], location[0]['geometry']['location']['lng']
    except Exception as e:
        print(f"Geocoder error: {e}")
        return


In [47]:
def predict_risk(location_name, month=None, season=None):
    geolocator = Nominatim(user_agent="geoapiExercises")
    try:
        location = gmaps.geocode(location_name)
        if not location:
            print("Location not found.")
            return
        latitude, longitude = location[0]['geometry']['location']['lat'], location[0]['geometry']['location']['lng']
    except Exception as e:
        print(f"Geocoder error: {e}")
        return
   
    location_data = data[data['Location'] == location_name]
    

    if location_data.empty:
        print("Location data not found in the dataset.")
        return

  
    avg_features = location_data[['Temperature', 'Humidity', 'Rainfall', 'Malaria Cases', 
                                  'Dengue Cases', 'Population Density', 'Water Body Nearby', 
                                  'Green Cover', 'Healthcare Facilities']].mean()
    cluster = kmeans.predict(scaler.transform([avg_features]))[0]
    
   
    if month:
        future = pd.DataFrame({'ds': pd.date_range(start=f"2024-{month:02}-01", periods=30, freq='D')})
    elif season:
        start_month = (season - 1) * 3 + 1
        future = pd.DataFrame({'ds': pd.date_range(start=f"2024-{start_month:02}-01", periods=90, freq='D')})

    malaria_forecast = malaria_model.predict(future)
    dengue_forecast = dengue_model.predict(future)
    

    avg_malaria_cases = malaria_forecast['yhat'].mean()
    avg_dengue_cases = dengue_forecast['yhat'].mean()
    probability_score = min(1.0, (avg_malaria_cases + avg_dengue_cases) / 100)

 
    risk_level = risk_labels[cluster]
    print(f"Predicted Risk Level: {risk_level}")
    print(f"Probability Score: {probability_score:.2f}")
    print(f"Top Contributing Factors: High Disease Cases, Environmental Conditions")

  
    # fig, ax = plt.subplots(1, 2, figsize=(14, 5))
    # malaria_model.plot(malaria_forecast, ax=ax[0])
    # ax[0].set_title("Malaria Cases Prediction")
    # dengue_model.plot(dengue_forecast, ax=ax[1])
    # ax[1].set_title("Dengue Cases Prediction")
    # plt.show()


In [None]:
location_name = input("Enter the location name: ")
time_choice = input("Enter time choice (month/season): ").strip().lower()

if time_choice == "month":
    month = int(input("Enter the month (1-12): "))
    predict_risk(location_name, month=month)
elif time_choice == "season":
    season = int(input("Enter the season (1=Winter, 2=Spring, 3=Summer, 4=Autumn): "))
    predict_risk(location_name, season=season)
else:
    print("Invalid time choice. Please select 'month' or 'season'.")

In [53]:
import joblib
joblib.dump(kmeans, 'kmeans_model.pkl')
joblib.dump(scaler, 'scaler_model.pkl')
joblib.dump(malaria_model, 'malaria_model.pkl')
joblib.dump(dengue_model, 'dengue_model.pkl')


['dengue_model.pkl']

In [25]:
# from prophet.diagnostics import cross_validation, performance_metrics

# df_cv = cross_validation(malaria_model, initial='730 days', period='180 days', horizon = '365 days')
# df_p = performance_metrics(df_cv)
# print(df_p[['horizon', 'mae', 'rmse']])
