### Import libraries


In [50]:
# Standard libraries
import logging

# Third-party libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score,recall_score,classification_report
import joblib

### Configure logging

In [51]:
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

### Load and preprocess data

In [52]:
# Load dataset
logger.info("Loading dataset")
data = pd.read_csv("D:/brototype/week27/weather driven disease prediction/weather_disease_prediction/weather_disease_prediction/data/raw/Weather-related disease prediction.csv")

2025-08-02 14:35:57,751 - INFO - Loading dataset


In [53]:
# Handle missing values
data = data.dropna()
logger.info("Dropped rows with missing values")

2025-08-02 14:35:57,794 - INFO - Dropped rows with missing values


In [54]:
# Encode categorical variables
label_encoder = LabelEncoder()
data['Gender'] = label_encoder.fit_transform(data['Gender'])
data['prognosis'] = label_encoder.fit_transform(data['prognosis'])
logger.info('Encoded categorical variables')

2025-08-02 14:35:57,811 - INFO - Encoded categorical variables


### Feature Engineering - Lag variables

In [55]:
# Create lag features and fill missing with mean
data['Temperature_lag1'] = data['Temperature (C)'].shift(1).fillna(data['Temperature (C)'].mean())
data['Humidity_lag1'] = data['Humidity'].shift(1).fillna(data['Humidity'].mean())
data['WindSpeed_lag1'] = data['Wind Speed (km/h)'].shift(1).fillna(data['Wind Speed (km/h)'].mean())
logger.info("Added lag features")

2025-08-02 14:35:57,829 - INFO - Added lag features


### Define Features and Target

In [56]:
# Explicitly include the required columns only once
initial_features = [
    'Age', 'Gender', 'Temperature (C)', 'Humidity', 'Wind Speed (km/h)',
    'Temperature_lag1', 'Humidity_lag1', 'WindSpeed_lag1'
]

# Add other features (excluding target)
remaining_features = [
    col for col in data.columns if col not in initial_features + ['prognosis']
]

# Combine feature columns
feature_cols = list(dict.fromkeys(initial_features + remaining_features))

# Feature matrix and target vector
X = data[feature_cols]
y = data['prognosis']
logger.info("Defined features and target")

2025-08-02 14:35:57,848 - INFO - Defined features and target


### Split Data and Train Models

In [57]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
logger.info("Split data into training and test sets")

# Model initialization
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
xgb_model = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss', random_state=42)

# Train models
rf_model.fit(X_train, y_train)
xgb_model.fit(X_train, y_train)
logger.info("Trained RandomForest and XGBoost models")

2025-08-02 14:35:57,865 - INFO - Split data into training and test sets
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
2025-08-02 14:35:59,023 - INFO - Trained RandomForest and XGBoost models


In [58]:
import sys
import os
import traceback

# Add correct src path for predict.py
sys.path.append(r"D:/brototype/week27/weather driven disease prediction/weather_disease_prediction/weather_disease_prediction/src")

from predict import predict_disease

# Symptom columns
symptom_columns = [
    'nausea', 'joint_pain', 'abdominal_pain', 'high_fever', 'chills', 'fatigue',
    'runny_nose', 'pain_behind_the_eyes', 'dizziness', 'headache', 'chest_pain',
    'vomiting', 'cough', 'shivering', 'asthma_history', 'high_cholesterol',
    'diabetes', 'obesity', 'hiv_aids', 'nasal_polyps', 'asthma',
    'high_blood_pressure', 'severe_headache', 'weakness', 'trouble_seeing',
    'fever', 'body_aches', 'sore_throat', 'sneezing', 'diarrhea',
    'rapid_breathing', 'rapid_heart_rate', 'pain_behind_eyes', 'swollen_glands',
    'rashes', 'sinus_headache', 'facial_pain', 'shortness_of_breath',
    'reduced_smell_and_taste', 'skin_irritation', 'itchiness', 'throbbing_headache',
    'confusion', 'back_pain', 'knee_ache'
]

# User input
user_input = {
    'age': 30,
    'gender': 'male',
    'temperature': 25.0,
    'humidity': 0.7,
    'wind_speed': 10.0,
    'symptoms': ['nausea', 'high_fever']
}

# Correct model paths
model_path = "../../models/trained_model.pkl"
label_path = "../../models/label_encoder.pkl"

# Prediction
try:
    disease = predict_disease(
        user_input,
        model_path,
        label_path,
        symptom_columns
    )
    print(f"Predicted Disease: {disease}")
except Exception as e:
    print(f"Prediction failed: {str(e)}")
    traceback.print_exc()


Predicted Disease: Heat Stroke
