In [None]:
# Cell 1: Comprehensive Program Execution (Consolidated for Robustness)

# --- 1. Core Imports and Setup ---
from google.colab import drive
import os, time, joblib, warnings
warnings.filterwarnings('ignore')

import numpy as np, pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold
from sklearn.tree import DecisionTreeClassifier, plot_tree, export_graphviz
from sklearn.svm import SVC
from sklearn.ensemble import BaggingClassifier, VotingClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix

# Mount Google Drive and setup output directory
drive.mount('/content/drive')
OUT_DIR = '/content/drive/MyDrive/Lab6_results'
os.makedirs(OUT_DIR, exist_ok=True)
print('Outputs will be saved to:', OUT_DIR)

# Install graphviz (optional, for rendering .dot -> .png)
!apt-get -qq install -y graphviz
!pip -q install graphviz
print('Graphviz installed')

# Define cv for all GridSearch operations
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# Helper functions
def measure_inference_time(model, X_sample, repeats=30):
    start = time.time()
    for _ in range(repeats):
        model.predict(X_sample)
    return (time.time() - start) / repeats

def plot_confusion(y_true, y_pred, title, out_path=None):
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(4,3))
    plt.imshow(cm, interpolation='nearest')
    plt.title(title)
    plt.xlabel('Predicted'); plt.ylabel('True')

    # Ensure target_names is available, otherwise use generic labels
    if 'target_names' in globals() and len(target_names) == cm.shape[0]:
        plt.xticks(np.arange(len(target_names)), target_names, rotation=45, ha='right')
        plt.yticks(np.arange(len(target_names)), target_names)
    else:
        plt.xticks(np.arange(cm.shape[0]))
        plt.yticks(np.arange(cm.shape[0]))
    plt.colorbar()
    for i in range(cm.shape[0]):
        for j in range(cm.shape[1]):
            plt.text(j, i, cm[i,j], ha='center', va='center')
    plt.tight_layout()
    if out_path:
        plt.savefig(out_path)
    plt.show()


# --- 2. Data Loading and Preprocessing ---
file_path = '/content/drive/MyDrive/UNSW.csv.csv'
try:
    df = pd.read_csv(file_path, low_memory=False)
    print(f"Successfully loaded {file_path}")
except FileNotFoundError:
    print(f"Error: The file {file_path} was not found. Please ensure it exists in your Google Drive.")
except Exception as e:
    print(f"An error occurred while loading the file: {e}")

# Convert columns to numeric, coercing errors, then impute missing values with their mean
df['0.4'] = pd.to_numeric(df['0.4'], errors='coerce')
mean_0_4 = df['0.4'].mean()
df['0.4'] = df['0.4'].fillna(mean_0_4)

df['0.5'] = pd.to_numeric(df['0.5'], errors='coerce')
mean_0_5 = df['0.5'].mean()
df['0.5'] = df['0.5'].fillna(mean_0_5)

df['Unnamed: 47'] = pd.to_numeric(df['Unnamed: 47'], errors='coerce')
mean_unnamed_47 = df['Unnamed: 47'].mean()
df['Unnamed: 47'] = df['Unnamed: 47'].fillna(mean_unnamed_47)

# Drop 'Unnamed: 47' column as intended
df.drop(columns=['Unnamed: 47'], inplace=True)

# Define y and X
y = df['0.7']
X = df.drop('0.7', axis=1)

# Identify categorical and numeric features
categorical_features = X.select_dtypes(include=['object']).columns.tolist()
numeric_features = X.select_dtypes(exclude=['object']).columns.tolist()

# Process '0.6' column (convert to numeric and impute) and update feature lists
X['0.6'] = pd.to_numeric(X['0.6'], errors='coerce')
mean_0_6 = X['0.6'].mean()
X['0.6'] = X['0.6'].fillna(mean_0_6)

if '0.6' in categorical_features:
    categorical_features.remove('0.6')
if '0.6' not in numeric_features:
    numeric_features.append('0.6')

# Perform one-hot encoding
X_encoded = pd.get_dummies(X, columns=categorical_features, drop_first=True)
print(f"Original features shape: {X.shape}")
print(f"Encoded features shape: {X_encoded.shape}")

# Define feature_names and target_names for UNSW dataset
feature_names = X_encoded.columns.tolist()
if sorted(y.unique().tolist()) == [0, 1]:
    target_names = ['Normal', 'Attack']
else:
    target_names = [str(x) for x in sorted(y.unique().tolist())]

# Perform train-test split
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.2, random_state=42, stratify=y)

print(f"Training features shape: {X_train.shape}")
print(f"Testing features shape: {X_test.shape}")
print(f"Training target shape: {y_train.shape}")
print(f"Testing target shape: {y_test.shape}")
print(f"Unique values in target variable y: {y.unique()}")


# --- 3. Model Training and Evaluation ---

# Decision Tree GridSearch
dt_base = DecisionTreeClassifier(random_state=42)
param_grid_dt = {
    'criterion': ['gini','entropy'],
    'max_depth': [3,5,8, None],
    'min_samples_split': [2,5,10],
    'min_samples_leaf': [1,2,4]
}
grid_dt = GridSearchCV(dt_base, param_grid_dt, cv=cv, scoring='f1', n_jobs=-1, verbose=1, refit=True)
print("\nFitting Decision Tree GridSearch (this may take a while)...")
t0 = time.time()
grid_dt.fit(X_train, y_train)
print("DT GridSearch time (s):", time.time()-t0)

best_dt = grid_dt.best_estimator_
print("Best DT params:", grid_dt.best_params_, "Best CV F1:", grid_dt.best_score_)

# Evaluate and save DT
y_pred_dt = best_dt.predict(X_test)
print("DT Test acc:", accuracy_score(y_test, y_pred_dt))
print("DT Test F1:", f1_score(y_test, y_pred_dt))
print(classification_report(y_test, y_pred_dt, target_names=target_names))
joblib.dump(best_dt, os.path.join(OUT_DIR, 'best_decision_tree.joblib'))
dot_path = os.path.join(OUT_DIR, 'Best_DecisionTree.dot')
export_graphviz(best_dt, out_file=dot_path, feature_names=list(feature_names), class_names=list(target_names), rounded=True, filled=True)
png_path = os.path.join(OUT_DIR, 'Best_DecisionTree.png')
!dot -Tpng "{dot_path}" -o "{png_path}" || true
print("Saved .dot and attempted to render png at:", png_path)
plt.figure(figsize=(12,6))
plot_tree(best_dt, feature_names=feature_names, class_names=target_names, filled=True, rounded=True)
plt.title('Best Decision Tree')
plt.show()

# SVC GridSearch (Pipeline with StandardScaler)
pipe_svc = Pipeline([('scaler', StandardScaler()), ('svc', SVC(probability=True, random_state=42))])
param_grid_svc = {
    'svc__C': [0.1, 1, 10],
    'svc__kernel': ['linear', 'rbf'],
    'svc__gamma': ['scale', 'auto']
}
grid_svc = GridSearchCV(pipe_svc, param_grid_svc, cv=cv, scoring='f1', n_jobs=-1, verbose=1, refit=True)
print("\nFitting SVC GridSearch...")
t0 = time.time()
grid_svc.fit(X_train, y_train)
print("SVC GridSearch time (s):", time.time()-t0)

best_svc = grid_svc.best_estimator_
print("Best SVC params:", grid_svc.best_params_, "Best CV F1:", grid_svc.best_score_)

# Evaluate and save SVC
y_pred_svc = best_svc.predict(X_test)
print("SVC Test acc:", accuracy_score(y_test, y_pred_svc))
print("SVC Test F1:", f1_score(y_test, y_pred_svc))
print(classification_report(y_test, y_pred_svc, target_names=target_names))
joblib.dump(best_svc, os.path.join(OUT_DIR, 'best_svc.joblib'))

# Bagging (SVC & DT) + Voting ensemble

# Bagging SVC (tune bagging params)
bag_svc = BaggingClassifier(estimator=best_svc, random_state=42)
param_grid_bag_svc = {'n_estimators': [10, 25, 50], 'max_samples': [0.6, 0.8, 1.0]}
grid_bag_svc = GridSearchCV(bag_svc, param_grid_bag_svc, cv=cv, scoring='f1', n_jobs=-1, verbose=1, refit=True)
print("\nFitting Bagging SVC GridSearch...")
t0 = time.time()
grid_bag_svc.fit(X_train, y_train)
print("Bagging SVC time (s):", time.time()-t0)
best_bag_svc = grid_bag_svc.best_estimator_
print("Best Bagging SVC:", grid_bag_svc.best_params_, "CV F1:", grid_bag_svc.best_score_)
joblib.dump(best_bag_svc, os.path.join(OUT_DIR, 'best_bag_svc.joblib'))

# Bagging DecisionTree
base_dt_bag = DecisionTreeClassifier(random_state=42) # Renamed to avoid conflict if best_dt is used directly
bag_dt = BaggingClassifier(estimator=base_dt_bag, random_state=42)
param_grid_bag_dt = {
    'n_estimators': [10, 25, 50],
    'max_samples': [0.6, 0.8, 1.0],
    'estimator__max_depth': [3,5,8],
    'estimator__min_samples_leaf': [1,2,4]
}
grid_bag_dt = GridSearchCV(bag_dt, param_grid_bag_dt, cv=cv, scoring='f1', n_jobs=-1, verbose=1, refit=True)
print("\nFitting Bagging DT GridSearch...")
t0 = time.time()
grid_bag_dt.fit(X_train, y_train)
print("Bagging DT time (s):", time.time()-t0)
best_bag_dt = grid_bag_dt.best_estimator_
print("Best Bagging DT:", grid_bag_dt.best_params_, "CV F1:", grid_bag_dt.best_score_)
joblib.dump(best_bag_dt, os.path.join(OUT_DIR, 'best_bag_dt.joblib'))

# Voting ensemble (soft)
voting = VotingClassifier(estimators=[('bag_svc', best_bag_svc), ('bag_dt', best_bag_dt)], voting='soft')
voting.fit(X_train, y_train)
joblib.dump(voting, os.path.join(OUT_DIR, 'voting_ensemble.joblib'))

# Evaluate and save summary
models = {'DecisionTree': best_dt, 'SVC': best_svc, 'Bagging_SVC': best_bag_svc, 'Bagging_DT': best_bag_dt, 'Voting': voting}
rows = []
for name, model in models.items():
    y_pred = model.predict(X_test)
    rows.append({'model': name, 'accuracy': accuracy_score(y_test, y_pred), 'precision': precision_score(y_test, y_pred), 'recall': recall_score(y_test, y_pred), 'f1': f1_score(y_test, y_pred), 'inf_time_s': measure_inference_time(model, X_test, repeats=30)})
results = pd.DataFrame(rows).set_index('model')
print("\n--- Model Performance Summary ---")
print(results)
results.to_csv(os.path.join(OUT_DIR, 'lab6_results_summary.csv'))

# Save confusion matrices and accuracy bar
for name, model in models.items():
    y_pred = model.predict(X_test)
    plot_confusion(y_test, y_pred, f'Confusion: {name}', out_path=os.path.join(OUT_DIR, f'confusion_{name}.png'))

plt.figure(figsize=(8,4))
plt.bar(results.index, results['accuracy'])
plt.title('Test set accuracy by model'); plt.ylabel('Accuracy'); plt.ylim(0.8,1.0)
plt.xticks(rotation=30)
plt.tight_layout()
plt.savefig(os.path.join(OUT_DIR, 'accuracy_bar.png'))
plt.show()


# --- 4. Save Run Notes ---
with open(os.path.join(OUT_DIR, 'lab6_run_notes.txt'), 'w') as f:
    f.write('LAB6 Colab run — GridSearch and Ensemble learning. Check saved models and figures in this folder.\n')
print("\nSaved run notes to", os.path.join(OUT_DIR, 'lab6_run_notes.txt'))

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Outputs will be saved to: /content/drive/MyDrive/Lab6_results
Graphviz installed
Successfully loaded /content/drive/MyDrive/UNSW.csv.csv


# Task
Execute the consolidated program in Cell 1 to perform data loading, preprocessing, model training, and evaluation for the `UNSW.csv.csv` dataset, then summarize the performance of all trained models and confirm that all outputs (models, plots, and a summary CSV) have been saved to the `/content/drive/MyDrive/Lab6_results` directory.

## Execute Consolidated Program

### Subtask:
Execute the comprehensive code in Cell 1 to perform data loading, preprocessing, model training, evaluation, and saving of results for the `UNSW.csv.csv` dataset.


**Reasoning**:
The subtask is to execute the comprehensive code in Cell 1. I will use the `code_block` command to run the content of `cell_id: eef277e5`.



In [None]:
# Cell 1: Comprehensive Program Execution (Consolidated for Robustness)

# --- 1. Core Imports and Setup ---
from google.colab import drive
import os, time, joblib, warnings
warnings.filterwarnings('ignore')

import numpy as np, pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold
from sklearn.tree import DecisionTreeClassifier, plot_tree, export_graphviz
from sklearn.svm import SVC
from sklearn.ensemble import BaggingClassifier, VotingClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix

# Mount Google Drive and setup output directory
drive.mount('/content/drive')
OUT_DIR = '/content/drive/MyDrive/Lab6_results'
os.makedirs(OUT_DIR, exist_ok=True)
print('Outputs will be saved to:', OUT_DIR)

# Install graphviz (optional, for rendering .dot -> .png)
!apt-get -qq install -y graphviz
!pip -q install graphviz
print('Graphviz installed')

# Define cv for all GridSearch operations
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# Helper functions
def measure_inference_time(model, X_sample, repeats=30):
    start = time.time()
    for _ in range(repeats):
        model.predict(X_sample)
    return (time.time() - start) / repeats

def plot_confusion(y_true, y_pred, title, out_path=None):
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(4,3))
    plt.imshow(cm, interpolation='nearest')
    plt.title(title)
    plt.xlabel('Predicted'); plt.ylabel('True')

    # Ensure target_names is available, otherwise use generic labels
    if 'target_names' in globals() and len(target_names) == cm.shape[0]:
        plt.xticks(np.arange(len(target_names)), target_names, rotation=45, ha='right')
        plt.yticks(np.arange(len(target_names)), target_names)
    else:
        plt.xticks(np.arange(cm.shape[0]))
        plt.yticks(np.arange(cm.shape[0]))
    plt.colorbar()
    for i in range(cm.shape[0]):
        for j in range(cm.shape[1]):
            plt.text(j, i, cm[i,j], ha='center', va='center')
    plt.tight_layout()
    if out_path:
        plt.savefig(out_path)
    plt.show()


# --- 2. Data Loading and Preprocessing ---
file_path = '/content/drive/MyDrive/UNSW.csv.csv'
try:
    df = pd.read_csv(file_path, low_memory=False)
    print(f"Successfully loaded {file_path}")
except FileNotFoundError:
    print(f"Error: The file {file_path} was not found. Please ensure it exists in your Google Drive.")
except Exception as e:
    print(f"An error occurred while loading the file: {e}")

# Convert columns to numeric, coercing errors, then impute missing values with their mean
df['0.4'] = pd.to_numeric(df['0.4'], errors='coerce')
mean_0_4 = df['0.4'].mean()
df['0.4'] = df['0.4'].fillna(mean_0_4)

df['0.5'] = pd.to_numeric(df['0.5'], errors='coerce')
mean_0_5 = df['0.5'].mean()
df['0.5'] = df['0.5'].fillna(mean_0_5)

df['Unnamed: 47'] = pd.to_numeric(df['Unnamed: 47'], errors='coerce')
mean_unnamed_47 = df['Unnamed: 47'].mean()
df['Unnamed: 47'] = df['Unnamed: 47'].fillna(mean_unnamed_47)

# Drop 'Unnamed: 47' column as intended
df.drop(columns=['Unnamed: 47'], inplace=True)

# Define y and X
y = df['0.7']
X = df.drop('0.7', axis=1)

# Identify categorical and numeric features
categorical_features = X.select_dtypes(include=['object']).columns.tolist()
numeric_features = X.select_dtypes(exclude=['object']).columns.tolist()

# Process '0.6' column (convert to numeric and impute) and update feature lists
X['0.6'] = pd.to_numeric(X['0.6'], errors='coerce')
mean_0_6 = X['0.6'].mean()
X['0.6'] = X['0.6'].fillna(mean_0_6)

if '0.6' in categorical_features:
    categorical_features.remove('0.6')
if '0.6' not in numeric_features:
    numeric_features.append('0.6')

# Perform one-hot encoding
X_encoded = pd.get_dummies(X, columns=categorical_features, drop_first=True)
print(f"Original features shape: {X.shape}")
print(f"Encoded features shape: {X_encoded.shape}")

# Define feature_names and target_names for UNSW dataset
feature_names = X_encoded.columns.tolist()
if sorted(y.unique().tolist()) == [0, 1]:
    target_names = ['Normal', 'Attack']
else:
    target_names = [str(x) for x in sorted(y.unique().tolist())]

# Perform train-test split
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.2, random_state=42, stratify=y)

print(f"Training features shape: {X_train.shape}")
print(f"Testing features shape: {X_test.shape}")
print(f"Training target shape: {y_train.shape}")
print(f"Testing target shape: {y_test.shape}")
print(f"Unique values in target variable y: {y.unique()}")


# --- 3. Model Training and Evaluation ---

# Decision Tree GridSearch
dt_base = DecisionTreeClassifier(random_state=42)
param_grid_dt = {
    'criterion': ['gini','entropy'],
    'max_depth': [3,5,8, None],
    'min_samples_split': [2,5,10],
    'min_samples_leaf': [1,2,4]
}
grid_dt = GridSearchCV(dt_base, param_grid_dt, cv=cv, scoring='f1', n_jobs=-1, verbose=1, refit=True)
print("\nFitting Decision Tree GridSearch (this may take a while)...")
t0 = time.time()
grid_dt.fit(X_train, y_train)
print("DT GridSearch time (s):", time.time()-t0)

best_dt = grid_dt.best_estimator_
print("Best DT params:", grid_dt.best_params_, "Best CV F1:", grid_dt.best_score_)

# Evaluate and save DT
y_pred_dt = best_dt.predict(X_test)
print("DT Test acc:", accuracy_score(y_test, y_pred_dt))
print("DT Test F1:", f1_score(y_test, y_pred_dt))
print(classification_report(y_test, y_pred_dt, target_names=target_names))
joblib.dump(best_dt, os.path.join(OUT_DIR, 'best_decision_tree.joblib'))
dot_path = os.path.join(OUT_DIR, 'Best_DecisionTree.dot')
export_graphviz(best_dt, out_file=dot_path, feature_names=list(feature_names), class_names=list(target_names), rounded=True, filled=True)
png_path = os.path.join(OUT_DIR, 'Best_DecisionTree.png')
!dot -Tpng "{dot_path}" -o "{png_path}" || true
print("Saved .dot and attempted to render png at:", png_path)
plt.figure(figsize=(12,6))
plot_tree(best_dt, feature_names=feature_names, class_names=target_names, filled=True, rounded=True)
plt.title('Best Decision Tree')
plt.show()

# SVC GridSearch (Pipeline with StandardScaler)
pipe_svc = Pipeline([('scaler', StandardScaler()), ('svc', SVC(probability=True, random_state=42))])
param_grid_svc = {
    'svc__C': [0.1, 1, 10],
    'svc__kernel': ['linear', 'rbf'],
    'svc__gamma': ['scale', 'auto']
}
grid_svc = GridSearchCV(pipe_svc, param_grid_svc, cv=cv, scoring='f1', n_jobs=-1, verbose=1, refit=True)
print("\nFitting SVC GridSearch...")
t0 = time.time()
grid_svc.fit(X_train, y_train)
print("SVC GridSearch time (s):", time.time()-t0)

best_svc = grid_svc.best_estimator_
print("Best SVC params:", grid_svc.best_params_, "Best CV F1:", grid_svc.best_score_)

# Evaluate and save SVC
y_pred_svc = best_svc.predict(X_test)
print("SVC Test acc:", accuracy_score(y_test, y_pred_svc))
print("SVC Test F1:", f1_score(y_test, y_pred_svc))
print(classification_report(y_test, y_pred_svc, target_names=target_names))
joblib.dump(best_svc, os.path.join(OUT_DIR, 'best_svc.joblib'))

# Bagging (SVC & DT) + Voting ensemble

# Bagging SVC (tune bagging params)
bag_svc = BaggingClassifier(estimator=best_svc, random_state=42)
param_grid_bag_svc = {'n_estimators': [10, 25, 50], 'max_samples': [0.6, 0.8, 1.0]}
grid_bag_svc = GridSearchCV(bag_svc, param_grid_bag_svc, cv=cv, scoring='f1', n_jobs=-1, verbose=1, refit=True)
print("\nFitting Bagging SVC GridSearch...")
t0 = time.time()
grid_bag_svc.fit(X_train, y_train)
print("Bagging SVC time (s):", time.time()-t0)
best_bag_svc = grid_bag_svc.best_estimator_
print("Best Bagging SVC:", grid_bag_svc.best_params_, "CV F1:", grid_bag_svc.best_score_)
joblib.dump(best_bag_svc, os.path.join(OUT_DIR, 'best_bag_svc.joblib'))

# Bagging DecisionTree
base_dt_bag = DecisionTreeClassifier(random_state=42) # Renamed to avoid conflict if best_dt is used directly
bag_dt = BaggingClassifier(estimator=base_dt_bag, random_state=42)
param_grid_bag_dt = {
    'n_estimators': [10, 25, 50],
    'max_samples': [0.6, 0.8, 1.0],
    'estimator__max_depth': [3,5,8],
    'estimator__min_samples_leaf': [1,2,4]
}
grid_bag_dt = GridSearchCV(bag_dt, param_grid_bag_dt, cv=cv, scoring='f1', n_jobs=-1, verbose=1, refit=True)
print("\nFitting Bagging DT GridSearch...")
t0 = time.time()
grid_bag_dt.fit(X_train, y_train)
print("Bagging DT time (s):", time.time()-t0)
best_bag_dt = grid_bag_dt.best_estimator_
print("Best Bagging DT:", grid_bag_dt.best_params_, "CV F1:", grid_bag_dt.best_score_)
joblib.dump(best_bag_dt, os.path.join(OUT_DIR, 'best_bag_dt.joblib'))

# Voting ensemble (soft)
voting = VotingClassifier(estimators=[('bag_svc', best_bag_svc), ('bag_dt', best_bag_dt)], voting='soft')
voting.fit(X_train, y_train)
joblib.dump(voting, os.path.join(OUT_DIR, 'voting_ensemble.joblib'))

# Evaluate and save summary
models = {'DecisionTree': best_dt, 'SVC': best_svc, 'Bagging_SVC': best_bag_svc, 'Bagging_DT': best_bag_dt, 'Voting': voting}
rows = []
for name, model in models.items():
    y_pred = model.predict(X_test)
    rows.append({'model': name, 'accuracy': accuracy_score(y_test, y_pred), 'precision': precision_score(y_test, y_pred), 'recall': recall_score(y_test, y_pred), 'f1': f1_score(y_test, y_pred), 'inf_time_s': measure_inference_time(model, X_test, repeats=30)})
results = pd.DataFrame(rows).set_index('model')
print("\n--- Model Performance Summary ---")
print(results)
results.to_csv(os.path.join(OUT_DIR, 'lab6_results_summary.csv'))

# Save confusion matrices and accuracy bar
for name, model in models.items():
    y_pred = model.predict(X_test)
    plot_confusion(y_test, y_pred, f'Confusion: {name}', out_path=os.path.join(OUT_DIR, f'confusion_{name}.png'))

plt.figure(figsize=(8,4))
plt.bar(results.index, results['accuracy'])
plt.title('Test set accuracy by model'); plt.ylabel('Accuracy'); plt.ylim(0.8,1.0)
plt.xticks(rotation=30)
plt.tight_layout()
plt.savefig(os.path.join(OUT_DIR, 'accuracy_bar.png'))
plt.show()


# --- 4. Save Run Notes ---
with open(os.path.join(OUT_DIR, 'lab6_run_notes.txt'), 'w') as f:
    f.write('LAB6 Colab run — GridSearch and Ensemble learning. Check saved models and figures in this folder.\n')
print("\nSaved run notes to", os.path.join(OUT_DIR, 'lab6_run_notes.txt'))

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Outputs will be saved to: /content/drive/MyDrive/Lab6_results
Graphviz installed
Successfully loaded /content/drive/MyDrive/UNSW.csv.csv


In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## Final Task

### Subtask:
Summarize the performance of all trained models and confirm that all outputs (models, plots, and a summary CSV) have been saved to the designated output directory in Google Drive.
