In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split, cross_val_score, KFold, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import (accuracy_score, precision_score, recall_score, f1_score,
                            confusion_matrix, precision_recall_curve, roc_curve, auc,
                            classification_report, roc_auc_score)

# Set plotting style
plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette('viridis')


In [None]:
# Load dataset
data = load_breast_cancer()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target, name='target')

# Display dataset information
print(f"Dataset shape: {X.shape}")
print(f"Number of classes: {len(np.unique(y))}")
print(f"Class distribution:\n{y.value_counts()}")
print(f"Class names: {data.target_names}")

# Display first few rows
X.head()


In [None]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42, stratify=y)

# Scale features for models that benefit from scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print(f"Training set shape: {X_train.shape}")
print(f"Testing set shape: {X_test.shape}")


In [None]:
# Define and train different models
models = {
    'Logistic Regression': LogisticRegression(max_iter=1000, random_state=42),
    'Decision Tree': DecisionTreeClassifier(max_depth=5, random_state=42),
    'Random Forest': RandomForestClassifier(n_estimators=100, max_depth=5, random_state=42),
    'SVM': SVC(kernel='rbf', C=1.0, probability=True, random_state=42)
}

# Dictionary to store predictions
y_preds = {}
y_pred_probs = {}

# Train models and make predictions
for name, model in models.items():
    if name in ['Logistic Regression', 'SVM']:
        # Models that benefit from scaling
        model.fit(X_train_scaled, y_train)
        y_preds[name] = model.predict(X_test_scaled)
        y_pred_probs[name] = model.predict_proba(X_test_scaled)[:, 1]
    else:
        # Models that don't require scaling
        model.fit(X_train, y_train)
        y_preds[name] = model.predict(X_test)
        y_pred_probs[name] = model.predict_proba(X_test)[:, 1]
    
    print(f"{name} model trained.")


In [None]:
# Calculate metrics for each model
metrics = pd.DataFrame(index=models.keys(), columns=['Accuracy', 'Precision', 'Recall', 'F1-Score'])

for name, y_pred in y_preds.items():
    metrics.loc[name, 'Accuracy'] = accuracy_score(y_test, y_pred)
    metrics.loc[name, 'Precision'] = precision_score(y_test, y_pred)
    metrics.loc[name, 'Recall'] = recall_score(y_test, y_pred)
    metrics.loc[name, 'F1-Score'] = f1_score(y_test, y_pred)

# Sort by F1-Score
metrics = metrics.sort_values('F1-Score', ascending=False)

# Display metrics
metrics


In [None]:
# Visualize the metrics
metrics_melted = pd.melt(metrics.reset_index(), id_vars='index', 
                          value_vars=['Accuracy', 'Precision', 'Recall', 'F1-Score'],
                          var_name='Metric', value_name='Score')

plt.figure(figsize=(12, 8))
sns.barplot(x='index', y='Score', hue='Metric', data=metrics_melted)
plt.title('Model Performance Comparison')
plt.xlabel('Model')
plt.ylabel('Score')
plt.ylim(0.8, 1.0)  # Adjust y-axis for better visualization
plt.legend(title='Metric')
plt.xticks(rotation=30)
plt.tight_layout()
plt.show()


In [None]:
# Function to plot confusion matrix
def plot_confusion_matrix(y_true, y_pred, title, ax=None):
    cm = confusion_matrix(y_true, y_pred)
    if ax is None:
        fig, ax = plt.subplots(figsize=(8, 6))
    
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=data.target_names, 
               yticklabels=data.target_names, ax=ax)
    ax.set_xlabel('Predicted')
    ax.set_ylabel('Actual')
    ax.set_title(title)
    
# Plot confusion matrices for all models
fig, axes = plt.subplots(2, 2, figsize=(16, 12))
axes = axes.ravel()

for i, (name, y_pred) in enumerate(y_preds.items()):
    plot_confusion_matrix(y_test, y_pred, f'Confusion Matrix - {name}', axes[i])

plt.tight_layout()
plt.show()


In [None]:
# Calculate ROC curves and AUC for each model
plt.figure(figsize=(10, 8))

for name, y_pred_prob in y_pred_probs.items():
    fpr, tpr, _ = roc_curve(y_test, y_pred_prob)
    roc_auc = auc(fpr, tpr)
    plt.plot(fpr, tpr, label=f'{name} (AUC = {roc_auc:.3f})')

# Add the random guessing line
plt.plot([0, 1], [0, 1], linestyle='--', color='gray', label='Random guessing')

plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curves for Different Models')
plt.legend(loc='lower right')
plt.grid(True)
plt.show()

# Calculate and display AUC values
auc_values = {}
for name, y_pred_prob in y_pred_probs.items():
    auc_values[name] = roc_auc_score(y_test, y_pred_prob)

auc_df = pd.DataFrame.from_dict(auc_values, orient='index', columns=['AUC'])
auc_df = auc_df.sort_values('AUC', ascending=False)
auc_df


In [None]:
# Calculate precision-recall curves for each model
plt.figure(figsize=(10, 8))

for name, y_pred_prob in y_pred_probs.items():
    precision, recall, _ = precision_recall_curve(y_test, y_pred_prob)
    avg_precision = np.mean(precision)
    plt.plot(recall, precision, label=f'{name} (Avg. Precision = {avg_precision:.3f})')

plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall Curves for Different Models')
plt.legend(loc='best')
plt.grid(True)
plt.show()


In [None]:
# Generate classification reports for each model
for name, y_pred in y_preds.items():
    print(f"Classification Report for {name}:")
    print(classification_report(y_test, y_pred, target_names=data.target_names))
    print("-" * 80)


In [None]:
# Perform 5-fold cross-validation on each model
cv_results = {}
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

for name, model in models.items():
    if name in ['Logistic Regression', 'SVM']:
        # Models that benefit from scaling
        scores = cross_val_score(model, X_train_scaled, y_train, cv=cv, scoring='accuracy')
    else:
        # Models that don't require scaling
        scores = cross_val_score(model, X_train, y_train, cv=cv, scoring='accuracy')
    
    cv_results[name] = scores
    
    print(f"{name} - Cross-validation scores: {scores}")
    print(f"{name} - Mean CV score: {scores.mean():.4f} (±{scores.std():.4f})")
    print("-" * 80)


In [None]:
# Visualize cross-validation results
cv_means = [scores.mean() for scores in cv_results.values()]
cv_stds = [scores.std() for scores in cv_results.values()]
model_names = list(cv_results.keys())

plt.figure(figsize=(12, 6))
plt.bar(range(len(model_names)), cv_means, yerr=cv_stds, alpha=0.7, capsize=10)
plt.xticks(range(len(model_names)), model_names, rotation=30)
plt.ylabel('Mean Accuracy')
plt.title('Cross-Validation Results (5-fold)')
plt.ylim(0.9, 1.0)  # Adjust y-axis for better visualization
plt.grid(axis='y')
plt.tight_layout()
plt.show()


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.datasets import load_breast_cancer, fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score, 
    confusion_matrix, classification_report, roc_curve, auc,
    precision_recall_curve, average_precision_score,
    mean_squared_error, mean_absolute_error, r2_score
)

# For reproducibility
np.random.seed(42)
plt.style.use('seaborn-v0_8-whitegrid')
