In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, roc_curve, auc,precision_score

In [None]:
df = pd.read_csv('air_quality_health_impact_data.csv')

In [None]:
df=df.drop(columns=['RecordID'])

In [None]:
X = df.drop(columns=["HealthImpactClass"], errors='ignore')
y = df["HealthImpactClass"]

In [None]:
#GradientBoostingClassifier 80,20
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
model = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
f1_score_weighted = f1_score(y_test, y_pred, average='weighted')
precision = precision_score(y_test, y_pred, average='weighted')
auc_roc = roc_auc_score(y_test, model.predict_proba(X_test), multi_class='ovr')

print("\nGradient Boosting Classifier Train 80% - Test 20% ")
print(f"Accuracy: {accuracy:.4f}")
print(f"F1 Score (Weighted): {f1_score_weighted:.4f}")
print(f"Precision: {precision:.4f}")
print(f"AUC-ROC: {auc_roc:.4f}")


Gradient Boosting Classifier Train 80% - Test 20% 
Accuracy: 0.9398
F1 Score (Weighted): 0.9362
Precision: 0.9328
AUC-ROC: 0.7831


In [None]:
#GradientBoostingClassifier 75,25
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42, stratify=y)
model = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
f1_score_weighted = f1_score(y_test, y_pred, average='weighted')
precision = precision_score(y_test, y_pred, average='weighted')
auc_roc = roc_auc_score(y_test, model.predict_proba(X_test), multi_class='ovr')

print("\nGradient Boosting Classifier Train 75% - Test 25% ")
print(f"Accuracy: {accuracy:.4f}")
print(f"F1 Score (Weighted): {f1_score_weighted:.4f}")
print(f"Precision: {precision:.4f}")
print(f"AUC-ROC: {auc_roc:.4f}")


Gradient Boosting Classifier Train 75% - Test 25% 
Accuracy: 0.9429
F1 Score (Weighted): 0.9393
Precision: 0.9374
AUC-ROC: 0.7765


In [None]:
#GradientBoostingClassifier 70,30
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)
model = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
f1_score_weighted = f1_score(y_test, y_pred, average='weighted')
precision = precision_score(y_test, y_pred, average='weighted')
auc_roc = roc_auc_score(y_test, model.predict_proba(X_test), multi_class='ovr')

print("\nGradient Boosting Classifier Train 70% - Test 30% ")
print(f"Accuracy: {accuracy:.4f}")
print(f"F1 Score (Weighted): {f1_score_weighted:.4f}")
print(f"Precision: {precision:.4f}")
print(f"AUC-ROC: {auc_roc:.4f}")


Gradient Boosting Classifier Train 70% - Test 30% 
Accuracy: 0.9404
F1 Score (Weighted): 0.9368
Precision: 0.9339
AUC-ROC: 0.7966


In [None]:
#Random Forest (80,20)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average='weighted')
precision = precision_score(y_test, y_pred,average='weighted')
auc_roc = roc_auc_score(y_test, model.predict_proba(X_test), multi_class='ovr')

print("Random Forest Model Evaluation Metrics (80,20):")
print(f"Accuracy: {accuracy:.4f}")
print(f"F1-score: {f1:.4f}")
print(f"Precision: {precision:.4f}")
print(f"AUC-ROC: {auc_roc:.4f}")

Random Forest Model Evaluation Metrics (80,20):
Accuracy: 0.9579
F1-score: 0.9505
Precision: 0.9489
AUC-ROC: 0.7794


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
#Random Forest (75,25)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42, stratify=y)

model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average='weighted')
precision = precision_score(y_test, y_pred,average='weighted')
auc_roc = roc_auc_score(y_test, model.predict_proba(X_test), multi_class='ovr')

print("Random Forest Model Evaluation Metrics (75,25):")
print(f"Accuracy: {accuracy:.4f}")
print(f"F1-score: {f1:.4f}")
print(f"Precision: {precision:.4f}")
print(f"AUC-ROC: {auc_roc:.4f}")

Random Forest Model Evaluation Metrics (75,25):
Accuracy: 0.9608
F1-score: 0.9536
Precision: 0.9517
AUC-ROC: 0.8216


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
#Random Forest (70,30)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average='weighted')
precision = precision_score(y_test, y_pred,average='weighted')
auc_roc = roc_auc_score(y_test, model.predict_proba(X_test), multi_class='ovr')

print("Random Forest Model Evaluation Metrics (70,30):")
print(f"Accuracy: {accuracy:.4f}")
print(f"F1-score: {f1:.4f}")
print(f"Precision: {precision:.4f}")
print(f"AUC-ROC: {auc_roc:.4f}")

Random Forest Model Evaluation Metrics (70,30):
Accuracy: 0.9587
F1-score: 0.9510
Precision: 0.9496
AUC-ROC: 0.8259


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
#LogisticRegression(80,20)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = make_pipeline(StandardScaler(), LogisticRegression(max_iter=1000, random_state=42))
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average='weighted')
precision = precision_score(y_test, y_pred,average='weighted')
y_pred_proba = model.predict_proba(X_test)
auc_roc = roc_auc_score(y_test, y_pred_proba, multi_class='ovr')

print("Logistic Regression Model Evaluation Metrics (80,20):")
print(f"Accuracy: {accuracy:.4f}")
print(f"F1-score: {f1:.4f}")
print(f"Precision: {precision:.4f}")
print(f"AUC-ROC: {auc_roc:.4f}")

Logistic Regression Model Evaluation Metrics (80,20):
Accuracy: 0.9424
F1-score: 0.9279
Precision: 0.9143
AUC-ROC: 0.7705


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
#LogisticRegression(75,25)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

model = make_pipeline(StandardScaler(), LogisticRegression(max_iter=1000, random_state=42))
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average='weighted')
precision = precision_score(y_test, y_pred,average='weighted')
y_pred_proba = model.predict_proba(X_test)
auc_roc = roc_auc_score(y_test, y_pred_proba, multi_class='ovr')

print("Logistic Regression Model Evaluation Metrics (75,25):")
print(f"Accuracy: {accuracy:.4f}")
print(f"F1-score: {f1:.4f}")
print(f"Precision: {precision:.4f}")
print(f"AUC-ROC: {auc_roc:.4f}")

Logistic Regression Model Evaluation Metrics (75,25):
Accuracy: 0.9394
F1-score: 0.9252
Precision: 0.9130
AUC-ROC: 0.7792


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
#LogisticRegression(70,30)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

model = make_pipeline(StandardScaler(), LogisticRegression(max_iter=1000, random_state=42))
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average='weighted')
precision = precision_score(y_test, y_pred,average='weighted')
y_pred_proba = model.predict_proba(X_test)
auc_roc = roc_auc_score(y_test, y_pred_proba, multi_class='ovr')

print("Logistic Regression Model Evaluation Metrics (70,30):")
print(f"Accuracy: {accuracy:.4f}")
print(f"F1-score: {f1:.4f}")
print(f"Precision: {precision:.4f}")
print(f"AUC-ROC: {auc_roc:.4f}")

Logistic Regression Model Evaluation Metrics (70,30):
Accuracy: 0.9404
F1-score: 0.9262
Precision: 0.9149
AUC-ROC: 0.7844


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
