Task-1:

In [47]:
import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, roc_auc_score

df=pd.read_csv(r"C:\Users\DELL\Downloads\healthcare-dataset-stroke-data.csv",encoding='latin1')

df = df.drop(columns=["id"])

df = df.dropna()

X = df.drop(columns=["stroke"])
y = df["stroke"]

X = pd.get_dummies(X, drop_first=True)

model = GradientBoostingClassifier(
    n_estimators=200,
    learning_rate=0.05,
    max_depth=3,
    random_state=42
)

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
acc_scores, auc_scores = [], []

for fold, (train_idx, test_idx) in enumerate(skf.split(X, y), start=1):
    X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
    y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]

    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    y_proba = model.predict_proba(X_test)[:, 1]

    acc = accuracy_score(y_test, y_pred)
    auc = roc_auc_score(y_test, y_proba)

    acc_scores.append(acc)
    auc_scores.append(auc)

    print(f"Fold {fold}: Accuracy = {acc:.4f} | ROC-AUC = {auc:.4f}")


print(f"Mean Accuracy: {np.mean(acc_scores):.4f}")
print(f"Mean ROC-AUC : {np.mean(auc_scores):.4f}")


Fold 1: Accuracy = 0.9542 | ROC-AUC = 0.8297
Fold 2: Accuracy = 0.9552 | ROC-AUC = 0.8346
Fold 3: Accuracy = 0.9572 | ROC-AUC = 0.8509
Fold 4: Accuracy = 0.9562 | ROC-AUC = 0.8279
Fold 5: Accuracy = 0.9541 | ROC-AUC = 0.8473
Mean Accuracy: 0.9554
Mean ROC-AUC : 0.8381


Task-2:

In [51]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import accuracy_score, roc_auc_score
from sklearn.ensemble import GradientBoostingClassifier

# Load dataset
df = pd.read_csv(r"C:\Users\DELL\Downloads\winequality-red.csv",encoding='latin1')  

df["good_wine"] = (df["quality"] >= 7).astype(int)

X = df.drop(columns=["quality", "good_wine"])
y = df["good_wine"]

# Initialize model
model = GradientBoostingClassifier(
    n_estimators=300,
    learning_rate=0.05,
    max_depth=3,
    random_state=42
)

# Cross-validation
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
acc_scores, auc_scores = [], []

for fold, (train_idx, test_idx) in enumerate(skf.split(X, y), start=1):
    X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
    y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]

    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    y_proba = model.predict_proba(X_test)[:, 1]

    acc = accuracy_score(y_test, y_pred)
    auc = roc_auc_score(y_test, y_proba)

    acc_scores.append(acc)
    auc_scores.append(auc)

    print(f"Fold {fold}: Accuracy = {acc:.4f} | ROC-AUC = {auc:.4f}")

print("="*50)
print(f"Mean Accuracy: {np.mean(acc_scores):.4f}")
print(f"Mean ROC-AUC : {np.mean(auc_scores):.4f}")


Fold 1: Accuracy = 0.9125 | ROC-AUC = 0.9208
Fold 2: Accuracy = 0.9094 | ROC-AUC = 0.9124
Fold 3: Accuracy = 0.9062 | ROC-AUC = 0.9125
Fold 4: Accuracy = 0.8781 | ROC-AUC = 0.8701
Fold 5: Accuracy = 0.9216 | ROC-AUC = 0.9179
Mean Accuracy: 0.9056
Mean ROC-AUC : 0.9067


Task-3:

In [53]:
import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, roc_auc_score
from xgboost import XGBClassifier

# Load dataset
df = pd.read_csv(r"C:\Users\DELL\Downloads\mushrooms (1).csv",encoding='latin1')

# Encode target: 'p' = poisonous (1), 'e' = edible (0)
y = df["class"].map({"p": 1, "e": 0})

# One-hot encode categorical features
X = pd.get_dummies(df.drop(columns=["class"]), drop_first=True)

# Define model (no use_label_encoder so no warnings)
model = XGBClassifier(
    n_estimators=300,
    learning_rate=0.05,
    max_depth=4,
    eval_metric="logloss",
    random_state=42
)

# Cross-validation
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
acc_scores, auc_scores = [], []

for fold, (train_idx, test_idx) in enumerate(skf.split(X, y), start=1):
    X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
    y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]

    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    y_proba = model.predict_proba(X_test)[:, 1]

    acc = accuracy_score(y_test, y_pred)
    auc = roc_auc_score(y_test, y_proba)

    acc_scores.append(acc)
    auc_scores.append(auc)

    print(f"Fold {fold}: Accuracy = {acc:.4f} | ROC-AUC = {auc:.4f}")

print("="*50)
print(f"Mean Accuracy: {np.mean(acc_scores):.4f}")
print(f"Mean ROC-AUC : {np.mean(auc_scores):.4f}")


Fold 1: Accuracy = 1.0000 | ROC-AUC = 1.0000
Fold 2: Accuracy = 1.0000 | ROC-AUC = 1.0000
Fold 3: Accuracy = 1.0000 | ROC-AUC = 1.0000
Fold 4: Accuracy = 1.0000 | ROC-AUC = 1.0000
Fold 5: Accuracy = 1.0000 | ROC-AUC = 1.0000
Mean Accuracy: 1.0000
Mean ROC-AUC : 1.0000
