<a href="https://colab.research.google.com/github/21Bhoomika05/main/blob/main/Untitled22.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from imblearn.combine import SMOTEENN
from xgboost import XGBClassifier
import joblib

# Load the dataset
file_path = "/content/drive/MyDrive/health_data.csv"
df = pd.read_csv(file_path)

# Drop irrelevant columns
df = df.drop(['id', 'State Names', 'District'], axis=1)

# Separate features and target
X = df.drop('Target (Healthcare Access)', axis=1)
y = df['Target (Healthcare Access)']

# Balance classes using SMOTE-ENN
smote_enn = SMOTEENN(random_state=42)
X_resampled, y_resampled = smote_enn.fit_resample(X, y)

# Scale the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_resampled)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y_resampled, test_size=0.2, random_state=42
)

# Define the XGBoost model
xgb = XGBClassifier(objective='multi:softmax', num_class=5, random_state=42)

# Define hyperparameter grid for RandomizedSearchCV
param_dist = {
    'n_estimators': [100, 200, 300, 400],
    'max_depth': [3, 5, 7, 10],
    'learning_rate': [0.01, 0.05, 0.1, 0.2],
    'subsample': [0.6, 0.8, 1.0],
    'colsample_bytree': [0.6, 0.8, 1.0]
}

# Hyperparameter tuning with RandomizedSearchCV
random_search = RandomizedSearchCV(
    xgb, param_distributions=param_dist, n_iter=20,
    scoring='accuracy', cv=5, random_state=42, n_jobs=-1, verbose=1
)

# Fit the model with training data
random_search.fit(X_train, y_train)

# Use the best estimator from the search
best_model = random_search.best_estimator_

# Predict on the test set
y_pred = best_model.predict(X_test)

# Evaluate the model's performance
print("Best Parameters:", random_search.best_params_)
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred))
print("\nAccuracy Score:", accuracy_score(y_test, y_pred))

# Save the trained model and scaler
joblib.dump(best_model, 'xgb_best_model.pkl')
joblib.dump(scaler, 'xgb_scaler.pkl')

# Load the saved model and scaler for predictions
loaded_model = joblib.load('xgb_best_model.pkl')
loaded_scaler = joblib.load('xgb_scaler.pkl')

# Example: Predict with new data input (15 features)
new_data = np.array([[130.0, 15.2, 160.0, 3.5, 25.0, 110.8, 40000.0, 65.0,
                      95.0, 98.0, 60.0, 75.0, 150.0, 1400.0, 4.0]])
 # 15 values

# Scale the new input data
new_data_scaled = loaded_scaler.transform(new_data)

# Predict the class for the new input
prediction = loaded_model.predict(new_data_scaled)
print("\nPredicted Class for new input:", prediction)



Fitting 5 folds for each of 20 candidates, totalling 100 fits




Best Parameters: {'subsample': 0.8, 'n_estimators': 400, 'max_depth': 5, 'learning_rate': 0.1, 'colsample_bytree': 0.6}

Confusion Matrix:
[[3 1 0 0]
 [0 3 0 0]
 [0 0 2 0]
 [0 0 0 2]]

Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.75      0.86         4
           1       0.75      1.00      0.86         3
           3       1.00      1.00      1.00         2
           4       1.00      1.00      1.00         2

    accuracy                           0.91        11
   macro avg       0.94      0.94      0.93        11
weighted avg       0.93      0.91      0.91        11


Accuracy Score: 0.9090909090909091

Predicted Class for new input: [0]




In [None]:
# Check feature names in the dataset to verify inclusion
print(df.columns)


Index(['Latitude', 'Longitude', 'TB Incidence', 'Diabetes ',
       'Malaria Incidence', 'HIV/AIDS', 'IMR', 'Vaccination ', 'Income (INR)',
       'Employment Rate', 'Education ', 'Housing', 'Urbanization ', 'AQI',
       'Annual Rainfall (mm)', 'Target (Healthcare Access)'],
      dtype='object')


In [None]:
# Load the saved model and scaler
loaded_model = joblib.load('xgb_best_model.pkl')
loaded_scaler = joblib.load('xgb_scaler.pkl')

# List of feature names including latitude and longitude
feature_col_names = [
    "Latitude", "Longitude", "TB Incidence", "Diabetes",
    "Malaria Incidence", "HIV/AIDS", "IMR", "Vaccination",
    "Income (INR)", "Employment Rate", "Education", "Housing",
    "Urbanization", "AQI", "Annual Rainfall (mm)"
]

# Collect user input for all features
user_input = []
for feature_name in feature_col_names:
    value = float(input(f"Enter value for {feature_name}: "))
    user_input.append(value)

# Convert the user input to a numpy array and reshape it
user_input_array = np.array(user_input).reshape(1, -1)

# Scale the user input
user_input_scaled = loaded_scaler.transform(user_input_array)

# Predict the class using the loaded model
prediction = loaded_model.predict(user_input_scaled)

# Output the prediction result based on the class
if prediction[0] == 0:
    print("The model predicts: VERY LOW ACCESS")
elif prediction[0] == 1:
    print("The model predicts: LOW ACCESS")
elif prediction[0] == 2:
    print("The model predicts: MODERATE ACCESS")
elif prediction[0] == 3:
    print("The model predicts: HIGH ACCESS")
elif prediction[0] == 4:
    print("The model predicts: VERY HIGH ACCESS")
else:
    print("Invalid prediction result.")


Enter value for Latitude: 15.36
Enter value for Longitude: 80.05
Enter value for TB Incidence: 115
Enter value for Diabetes: 11.9
Enter value for Malaria Incidence: 125
Enter value for HIV/AIDS: 3.9
Enter value for IMR: 27
Enter value for Vaccination: 88
Enter value for Income (INR): 41000
Enter value for Employment Rate: 59.5
Enter value for Education: 76
Enter value for Housing: 72
Enter value for Urbanization: 45
Enter value for AQI: 93
Enter value for Annual Rainfall (mm): 1000
The model predicts: VERY LOW ACCESS


