In [2]:
# 📌 Step 1: Import Libraries
import pandas as pd
import joblib
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

# 📌 Step 2: Load Data
df = pd.read_csv('../Data/preprocessed_data.csv')
X = df.drop('Disease_Outbreak', axis=1)
y = df['Disease_Outbreak']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# 📌 Step 3: Scale Data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 📌 Step 4: Hyperparameter Tuning (Logistic Regression)
param_grid = {
    'C': [0.1, 1, 10],
    'solver': ['liblinear', 'lbfgs']
}

grid = GridSearchCV(LogisticRegression(max_iter=1000, random_state=42), param_grid, cv=5, scoring='accuracy', n_jobs=-1)
grid.fit(X_train_scaled, y_train)
best_model = grid.best_estimator_

# 📌 Step 5: Evaluate
y_pred = best_model.predict(X_test_scaled)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

# 📌 Step 6: Save fine-tuned model and scaler
joblib.dump(best_model, '../App/model/fine_tune.pkl')
joblib.dump(scaler, '../App/model/scaler.pkl')  # ✅ Save scaler again
print("✅ Fine-tuned model and scaler saved")


Accuracy: 1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       200
           1       1.00      1.00      1.00       200

    accuracy                           1.00       400
   macro avg       1.00      1.00      1.00       400
weighted avg       1.00      1.00      1.00       400

✅ Fine-tuned model and scaler saved
