In [None]:
print("Hello")

In [None]:
import os

print(os.listdir("/kaggle/working"))


In [None]:
import shutil

# Define the directory to delete
directory_to_delete = "/kaggle/working/best_model_mel.pth"

# Delete the directory and its contents
try:
    shutil.rmtree(directory_to_delete)
    print(f"Successfully deleted: {directory_to_delete}")
except FileNotFoundError:
    print(f"Directory not found: {directory_to_delete}")
except Exception as e:
    print(f"Error deleting directory: {e}")


In [None]:
import os

# Path to the working directory
directory_to_delete = "/kaggle/working"

# Iterate over items in the directory
for item in os.listdir(directory_to_delete):
    item_path = os.path.join(directory_to_delete, item)
    
    # Check if the item is a file
    if os.path.isfile(item_path):
        os.remove(item_path)  # Delete the file
        print(f"Deleted file: {item_path}")
    else:
        print(f"Skipped folder: {item_path}")


In [None]:
import os
import shutil

# Source directory (COPD Levels folder)
source_directory = "/kaggle/input/split-dataset-2/Split_Dataset2"

# Destination directory
destination_directory = "/kaggle/working/Split_Dataset2"

# Ensure the destination directory exists
os.makedirs(destination_directory, exist_ok=True)

# Walk through all folders in the source directory
for root, dirs, files in os.walk(source_directory):
    # Create a corresponding folder structure in the destination
    relative_path = os.path.relpath(root, source_directory)
    target_path = os.path.join(destination_directory, relative_path)
    os.makedirs(target_path, exist_ok=True)
    
    # Copy all files in the current directory
    for file_name in files:
        source_path = os.path.join(root, file_name)
        destination_path = os.path.join(target_path, file_name)
        shutil.copy(source_path, destination_path)
        print(f"Copied: {source_path} to {destination_path}")

print(f"All files and folders have been successfully copied to {destination_directory}")


In [None]:
import os
from pydub import AudioSegment

# Input and output directories
input_directory = "/kaggle/working/COPD_Levels"
output_directory = "/kaggle/working/Preprocessed_COPD_Levels"

# Ensure output directory exists
os.makedirs(output_directory, exist_ok=True)

# Function to segment audio files
def segment_audio(file_path, output_path, segment_duration=10*1000, overlap_duration=2*1000):
    """
    Segments an audio file into fixed intervals with overlapping.
    
    Args:
        file_path (str): Path to the input audio file.
        output_path (str): Path to save the segmented files.
        segment_duration (int): Duration of each segment in milliseconds (default 10 seconds).
        overlap_duration (int): Overlap duration between segments in milliseconds (default 2 seconds).
    """
    # Load the audio file
    audio = AudioSegment.from_file(file_path)
    audio_length = len(audio)
    
    # Calculate start and end times for each segment
    start = 0
    while start + segment_duration <= audio_length:
        end = start + segment_duration
        segment = audio[start:end]
        
        # Save the segment
        segment_name = f"{os.path.splitext(os.path.basename(file_path))[0]}_{start // 1000}_{end // 1000}.wav"
        segment.export(os.path.join(output_path, segment_name), format="wav")
        
        start += segment_duration - overlap_duration  # Move forward with overlap

    # Handle the remaining part (if any) by creating a final segment
    if start < audio_length:
        segment = audio[-segment_duration:]  # Take the last segment of 10 seconds
        segment_name = f"{os.path.splitext(os.path.basename(file_path))[0]}_final.wav"
        segment.export(os.path.join(output_path, segment_name), format="wav")

# Process all COPD level folders and audio files
for root, dirs, files in os.walk(input_directory):
    for file_name in files:
        if file_name.endswith(".wav"):  # Adjust the file extension if needed
            input_file_path = os.path.join(root, file_name)
            
            # Determine the corresponding output folder
            relative_path = os.path.relpath(root, input_directory)
            output_folder = os.path.join(output_directory, relative_path)
            os.makedirs(output_folder, exist_ok=True)
            
            # Segment the audio file
            segment_audio(input_file_path, output_folder)

print(f"Preprocessing complete! Segmented audio files are saved in: {output_directory}")


In [None]:
!pip install soundfile


In [None]:
import os
import librosa
import numpy as np
import soundfile as sf  # Import soundfile

def apply_pitch_scaling(audio_file, output_path, sr=22050, n_steps=2):
    # Load the audio file
    y, sr = librosa.load(audio_file, sr=sr)
    # Apply pitch scaling (shift by `n_steps` semitones)
    y_shifted = librosa.effects.pitch_shift(y, sr=sr, n_steps=n_steps)
    # Save the augmented audio using soundfile
    file_name = os.path.basename(audio_file).replace(".wav", f"_pitch_{n_steps}.wav")
    sf.write(os.path.join(output_path, file_name), y_shifted, sr)  # Use soundfile.write

def apply_noise_addition(audio_file, output_path, noise_level=0.005, sr=22050):
    # Load the audio file
    y, sr = librosa.load(audio_file, sr=sr)
    # Generate white noise
    noise = np.random.normal(0, noise_level, y.shape)
    # Add noise to the audio signal
    y_noisy = y + noise
    # Save the augmented audio using soundfile
    file_name = os.path.basename(audio_file).replace(".wav", f"_noise_{noise_level}.wav")
    sf.write(os.path.join(output_path, file_name), y_noisy, sr)  # Use soundfile.write

input_dir = "/kaggle/working/Preprocessed_COPD_Levels"
output_dir = "/kaggle/working/Augmented_COPD_Levels"

# Create output directories
for level in ["COPD0", "COPD1", "COPD2", "COPD3", "COPD4"]:
    os.makedirs(os.path.join(output_dir, level), exist_ok=True)

# Apply augmentations
for level in os.listdir(input_dir):
    input_path = os.path.join(input_dir, level)
    output_path = os.path.join(output_dir, level)
    
    for file in os.listdir(input_path):
        audio_file = os.path.join(input_path, file)
        
        # Apply pitch scaling (e.g., shift by +2 and -2 semitones)
        apply_pitch_scaling(audio_file, output_path, n_steps=2)
        apply_pitch_scaling(audio_file, output_path, n_steps=-2)
        
        # Apply noise addition (e.g., noise level = 0.005)
        apply_noise_addition(audio_file, output_path, noise_level=0.005)


In [None]:
! pip install librosa

In [None]:
import os
import librosa
import numpy as np
import torch
import matplotlib.pyplot as plt
from tqdm import tqdm
import cv2

class AudioFeatureExtractor:
    def __init__(self, sr=22050, device='cuda' if torch.cuda.is_available() else 'cpu'):
        self.sr = sr
        self.device = device
        # Parameters for feature extraction
        self.n_fft = 2048
        self.hop_length = 512
        self.n_mels = 128
        self.n_chroma = 12
        
    def load_audio(self, audio_path):
        """Load audio file and convert to torch tensor"""
        y, _ = librosa.load(audio_path, sr=self.sr)
        return torch.FloatTensor(y).to(self.device)
    
    def save_feature_image(self, feature, output_path, vmin=None, vmax=None):
        """Save feature matrix as an image"""
        plt.figure(figsize=(10, 4))
        plt.imshow(feature, aspect='auto', origin='lower', cmap='viridis', vmin=vmin, vmax=vmax)
        plt.axis('off')
        plt.tight_layout()
        plt.savefig(output_path, bbox_inches='tight', pad_inches=0)
        plt.close()
        
        # Convert to grayscale and resize for consistency
        img = cv2.imread(output_path, cv2.IMREAD_GRAYSCALE)
        img = cv2.resize(img, (224, 224))  # Standard size for many CNN architectures
        cv2.imwrite(output_path, img)
    
    def extract_features(self, audio_path, output_dir):
        """Extract and save all features for a single audio file"""
        # Create output directories if they don't exist
        feature_types = ['spectrogram', 'melspectrogram', 'chromagram']
        for feat_type in feature_types:
            os.makedirs(os.path.join(output_dir, feat_type), exist_ok=True)
        
        # Load audio
        y = self.load_audio(audio_path)
        
        # Calculate STFT (move to GPU if available)
        stft = torch.stft(
            y,
            n_fft=self.n_fft,
            hop_length=self.hop_length,
            window=torch.hann_window(self.n_fft).to(self.device),
            return_complex=True
        )
        
        # Convert to magnitude spectrogram
        spectrogram = torch.abs(stft).cpu().numpy()
        
        # Generate file paths
        base_filename = os.path.splitext(os.path.basename(audio_path))[0]
        
        # 1. Save Spectrogram
        spec_db = librosa.amplitude_to_db(spectrogram, ref=np.max)
        spec_path = os.path.join(output_dir, 'spectrogram', f'{base_filename}_spec.png')
        self.save_feature_image(spec_db, spec_path)
        
        # 2. Generate and save Melspectrogram
        mel_basis = librosa.filters.mel(sr=self.sr, n_fft=self.n_fft, n_mels=self.n_mels)
        mel_spec = np.dot(mel_basis, spectrogram)
        mel_spec_db = librosa.amplitude_to_db(mel_spec, ref=np.max)
        mel_path = os.path.join(output_dir, 'melspectrogram', f'{base_filename}_mel.png')
        self.save_feature_image(mel_spec_db, mel_path)
        
        # 3. Generate and save Chromagram
        chroma_basis = librosa.filters.chroma(sr=self.sr, n_fft=self.n_fft)
        chromagram = np.dot(chroma_basis, spectrogram)
        chroma_path = os.path.join(output_dir, 'chromagram', f'{base_filename}_chroma.png')
        self.save_feature_image(chromagram, chroma_path, vmin=0)
        
        return spec_path, mel_path, chroma_path

def process_dataset(input_dir, output_dir):
    """Process entire dataset"""
    extractor = AudioFeatureExtractor()
    
    # Process each COPD level
    for level in os.listdir(input_dir):
        level_path = os.path.join(input_dir, level)
        if not os.path.isdir(level_path):
            continue
            
        # Create output directory for this COPD level
        level_output = os.path.join(output_dir, level)
        os.makedirs(level_output, exist_ok=True)
        
        # Process all audio files in this level
        audio_files = [f for f in os.listdir(level_path) if f.endswith('.wav')]
        for audio_file in tqdm(audio_files, desc=f'Processing {level}'):
            audio_path = os.path.join(level_path, audio_file)
            try:
                extractor.extract_features(audio_path, level_output)
            except Exception as e:
                print(f"Error processing {audio_path}: {str(e)}")

# Example usage
if __name__ == "__main__":
    input_dir = "/kaggle/working/Augmented_COPD_Levels"
    output_dir = "/kaggle/working/Features_Extracted_COPD_Levels"
    
    # Create main output directory
    os.makedirs(output_dir, exist_ok=True)
    
    # Process the dataset
    process_dataset(input_dir, output_dir)

In [None]:
import shutil

# Define the folder to compress
folder_to_zip = "/kaggle/working/Augmented_COPD_Levels"
output_zip_file = "/kaggle/working/Augmented_COPD_Levels.zip"

# Compress the folder into a zip file
shutil.make_archive(folder_to_zip, 'zip', folder_to_zip)

# The zip file is now available at /kaggle/working/Augmented_COPD_Levels.zip


In [None]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms
from PIL import Image
import os
import numpy as np
from sklearn.metrics import precision_recall_fscore_support, confusion_matrix, classification_report
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
from datetime import datetime
import json
from tqdm import tqdm

class COPDDataset(Dataset):
    def __init__(self, data_dir, transform=None):
        self.data_dir = data_dir
        self.transform = transform
        self.samples = []
        self.class_counts = {f'COPD{i}': 0 for i in range(5)}
        
        # Get all image files and their labels
        for class_name in os.listdir(data_dir):
            class_dir = os.path.join(data_dir, class_name)
            if os.path.isdir(class_dir):
                label = int(class_name.replace('COPD', ''))
                self.class_counts[class_name] += len([f for f in os.listdir(class_dir) if f.endswith('.png')])
                for img_name in os.listdir(class_dir):
                    if img_name.endswith('.png'):
                        self.samples.append((os.path.join(class_dir, img_name), label))
                        
    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        img_path, label = self.samples[idx]
        image = Image.open(img_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        return image, label

class ExperimentTracker:
    def __init__(self, exp_name):
        self.exp_name = exp_name
        self.timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        self.results_dir = f'results_{self.exp_name}_{self.timestamp}'
        os.makedirs(self.results_dir, exist_ok=True)
        self.metrics = {
            'train_losses': [], 'train_accs': [],
            'val_losses': [], 'val_accs': [],
            'final_test_metrics': None,
            'confusion_matrix': None,
            'class_metrics': None,
            'training_params': None
        }
    
    def save_metrics(self):
        metrics_file = os.path.join(self.results_dir, 'metrics.json')
        with open(metrics_file, 'w') as f:
            json.dump(self.metrics, f, indent=4)
    
    def plot_confusion_matrix(self, cm):
        plt.figure(figsize=(10, 8))
        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
        plt.title('Confusion Matrix')
        plt.ylabel('True Label')
        plt.xlabel('Predicted Label')
        plt.savefig(os.path.join(self.results_dir, 'confusion_matrix.png'))
        plt.close()

def train_model(model, train_loader, val_loader, criterion, optimizer, 
                exp_tracker, num_epochs=50, device='cuda'):
    print(f"Using device: {device}")
    best_val_acc = 0.0
    
    for epoch in tqdm(range(num_epochs), desc="Training Progress"):
        # Training phase
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            
            with torch.cuda.amp.autocast():  # Mixed precision training
                outputs = model(inputs)
                loss = criterion(outputs, labels)
            
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
        
        train_loss = running_loss / len(train_loader)
        train_acc = 100. * correct / total
        exp_tracker.metrics['train_losses'].append(train_loss)
        exp_tracker.metrics['train_accs'].append(train_acc)
        
        # Validation phase
        model.eval()
        running_loss = 0.0
        correct = 0
        total = 0
        val_predictions = []
        val_targets = []
        
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                
                running_loss += loss.item()
                _, predicted = outputs.max(1)
                total += labels.size(0)
                correct += predicted.eq(labels).sum().item()
                
                val_predictions.extend(predicted.cpu().numpy())
                val_targets.extend(labels.cpu().numpy())
        
        val_loss = running_loss / len(val_loader)
        val_acc = 100. * correct / total
        exp_tracker.metrics['val_losses'].append(val_loss)
        exp_tracker.metrics['val_accs'].append(val_acc)
        
        print(f'Epoch {epoch+1}/{num_epochs}:')
        print(f'Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%')
        print(f'Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%')
        
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'val_acc': val_acc,
            }, os.path.join(exp_tracker.results_dir, 'best_resent_model_2.pth'))
    
    return model

def evaluate_model(model, test_loader, criterion, device, exp_tracker):
    model.eval()
    all_predictions = []
    all_targets = []
    test_loss = 0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for inputs, labels in tqdm(test_loader, desc="Evaluating"):
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            
            test_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
            
            all_predictions.extend(predicted.cpu().numpy())
            all_targets.extend(labels.cpu().numpy())
    
    # Calculate metrics
    test_acc = 100. * correct / total
    test_loss = test_loss / len(test_loader)
    cm = confusion_matrix(all_targets, all_predictions)
    class_metrics = precision_recall_fscore_support(all_targets, all_predictions, average='weighted')
    
    # Save results
    exp_tracker.metrics['final_test_metrics'] = {
        'test_accuracy': test_acc,
        'test_loss': test_loss,
        'precision': class_metrics[0],
        'recall': class_metrics[1],
        'f1_score': class_metrics[2]
    }
    exp_tracker.metrics['confusion_matrix'] = cm.tolist()
    exp_tracker.plot_confusion_matrix(cm)
    
    # Print detailed classification report
    print("\nClassification Report:")
    print(classification_report(all_targets, all_predictions, 
                              target_names=[f'COPD{i}' for i in range(5)]))
    
    return test_acc, test_loss, cm

def main():
    # Set random seeds for reproducibility
    torch.manual_seed(42)
    torch.cuda.manual_seed(42)
    np.random.seed(42)
    
    # Initialize experiment tracker
    exp_tracker = ExperimentTracker('COPD_classification')
    
    # Set device and enable cudnn benchmarking
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    torch.backends.cudnn.benchmark = True
    
    # Define transforms with augmentation for training
    train_transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(10),
        transforms.ColorJitter(brightness=0.2, contrast=0.2),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    
    val_transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    
    # Create datasets
    train_dataset = COPDDataset("/kaggle/working/Split_Dataset2/train", transform=train_transform)
    val_dataset = COPDDataset("/kaggle/working/Split_Dataset2/val", transform=val_transform)
    test_dataset = COPDDataset("/kaggle/working/Split_Dataset2/test", transform=val_transform)
    
    # Save dataset statistics
    exp_tracker.metrics['dataset_stats'] = {
        'train_samples': len(train_dataset),
        'val_samples': len(val_dataset),
        'test_samples': len(test_dataset),
        'class_distribution': train_dataset.class_counts
    }
    
    # Create data loaders
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, 
                            num_workers=4, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, 
                          num_workers=4, pin_memory=True)
    test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, 
                           num_workers=4, pin_memory=True)
    
    # Initialize model
    model = models.resnet50(pretrained=True)
    num_ftrs = model.fc.in_features
    model.fc = nn.Linear(num_ftrs, 5)
    model = model.to(device)
    
    # Define loss function and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.AdamW(model.parameters(), lr=0.001, weight_decay=0.01)
    
    # Save training parameters
    exp_tracker.metrics['training_params'] = {
        'model': 'ResNet50',
        'optimizer': 'AdamW',
        'learning_rate': 0.001,
        'weight_decay': 0.01,
        'batch_size': 32,
        'num_epochs': 50
    }
    
    # Train model
    model = train_model(model, train_loader, val_loader, criterion, optimizer, 
                       exp_tracker, num_epochs=50, device=device)
    
    # Evaluate on test set
    test_acc, test_loss, cm = evaluate_model(model, test_loader, criterion, 
                                           device, exp_tracker)
    
    # Save all metrics
    exp_tracker.save_metrics()
    
    print(f"\nExperiment complete. Results saved in: {exp_tracker.results_dir}")

if __name__ == '__main__':
    main()

In [None]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms
from PIL import Image
import os

class COPDSpectrogramDataset(Dataset):
    def __init__(self, data_dir, spec_type='mel', transform=None):
        """
        Args:
            data_dir (str): Directory path
            spec_type (str): One of 'mel', 'spec', or 'chroma'
            transform: torchvision transforms
        """
        self.data_dir = data_dir
        self.transform = transform
        self.samples = []
        self.class_counts = {f'COPD{i}': 0 for i in range(5)}
        
        # Define file patterns for each spectrogram type
        spec_patterns = {
            'mel': 'mel_spectrogram',
            'spec': 'spectrogram',
            'chroma': 'chromogram'
        }
        self.pattern = spec_patterns[spec_type]
        
        # Only load the specified spectrogram type
        for class_name in os.listdir(data_dir):
            class_dir = os.path.join(data_dir, class_name)
            if os.path.isdir(class_dir):
                label = int(class_name.replace('COPD', ''))
                for img_name in os.listdir(class_dir):
                    # Filter files based on spectrogram type
                    if self.pattern in img_name:
                        self.samples.append((os.path.join(class_dir, img_name), label))
                        self.class_counts[class_name] += 1
    
    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        img_path, label = self.samples[idx]
        image = Image.open(img_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        return image, label

def train_separate_models(base_data_dir, num_epochs=50):
    # Set device
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Using device: {device}")
    
    # Define transforms
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(10),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    
    # Train separate models for each spectrogram type
    for spec_type in ['mel', 'spec', 'chroma']:
        print(f"\nTraining model for {spec_type} spectrograms...")
        
        # Create datasets for current spectrogram type
        train_dataset = COPDSpectrogramDataset(
            os.path.join(base_data_dir, 'train'),
            spec_type=spec_type,
            transform=transform
        )
        val_dataset = COPDSpectrogramDataset(
            os.path.join(base_data_dir, 'val'),
            spec_type=spec_type,
            transform=transform
        )
        
        print(f"Number of training samples: {len(train_dataset)}")
        print(f"Number of validation samples: {len(val_dataset)}")
        print("Class distribution:", train_dataset.class_counts)
        
        # Create dataloaders
        train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4, pin_memory=True)
        val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=4, pin_memory=True)
        
        # Initialize model
        model = models.resnet50(pretrained=True)
        model.fc = nn.Linear(model.fc.in_features, 5)
        model = model.to(device)
        
        # Training setup
        criterion = nn.CrossEntropyLoss()
        optimizer = torch.optim.AdamW(model.parameters(), lr=0.001, weight_decay=0.01)
        
        # Train model
        best_val_acc = 0.0
        for epoch in range(num_epochs):
            # Training phase
            model.train()
            train_loss = 0.0
            train_correct = 0
            train_total = 0
            
            for inputs, labels in train_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                optimizer.zero_grad()
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()
                
                _, predicted = outputs.max(1)
                train_total += labels.size(0)
                train_correct += predicted.eq(labels).sum().item()
                train_loss += loss.item()
            
            # Validation phase
            model.eval()
            val_loss = 0.0
            val_correct = 0
            val_total = 0
            
            with torch.no_grad():
                for inputs, labels in val_loader:
                    inputs, labels = inputs.to(device), labels.to(device)
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)
                    
                    _, predicted = outputs.max(1)
                    val_total += labels.size(0)
                    val_correct += predicted.eq(labels).sum().item()
                    val_loss += loss.item()
            
            # Calculate metrics
            train_acc = 100. * train_correct / train_total
            val_acc = 100. * val_correct / val_total
            
            print(f'Epoch {epoch+1}/{num_epochs}:')
            print(f'Train Loss: {train_loss/len(train_loader):.4f}, Train Acc: {train_acc:.2f}%')
            print(f'Val Loss: {val_loss/len(val_loader):.4f}, Val Acc: {val_acc:.2f}%')
            
            # Save best model
            if val_acc > best_val_acc:
                best_val_acc = val_acc
                torch.save({
                    'epoch': epoch,
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'val_acc': val_acc,
                }, f'best_model_{spec_type}.pth')

def main():
    base_data_dir = "/kaggle/working/Split_Dataset"
    train_separate_models(base_data_dir)

if __name__ == "__main__":
    main()

In [None]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms
from PIL import Image
import os
import numpy as np
from sklearn.metrics import precision_recall_fscore_support, confusion_matrix, classification_report
import seaborn as sns
import matplotlib.pyplot as plt
import json
from tqdm import tqdm
from datetime import datetime


class COPDDataset(Dataset):
    def __init__(self, data_dir, transform=None):
        self.data_dir = data_dir
        self.transform = transform
        self.samples = []
        self.class_counts = {f'COPD{i}': 0 for i in range(5)}
        
        # Get all image files with '_chromogram' in their names and their labels
        for class_name in os.listdir(data_dir):
            class_dir = os.path.join(data_dir, class_name)
            if os.path.isdir(class_dir):
                label = int(class_name.replace('COPD', ''))
                for img_name in os.listdir(class_dir):
                    if '_chromogram' in img_name and img_name.endswith('.png'):
                        self.samples.append((os.path.join(class_dir, img_name), label))
                        self.class_counts[class_name] += 1

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        img_path, label = self.samples[idx]
        image = Image.open(img_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        return image, label


class ExperimentTracker:
    def __init__(self, exp_name):
        self.exp_name = exp_name
        self.timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        self.results_dir = f'results_{self.exp_name}_{self.timestamp}'
        os.makedirs(self.results_dir, exist_ok=True)
        self.metrics = {
            'train_losses': [], 'train_accs': [],
            'val_losses': [], 'val_accs': [],
            'final_test_metrics': None,
            'confusion_matrix': None,
            'class_metrics': None,
            'training_params': None
        }
    
    def save_metrics(self):
        metrics_file = os.path.join(self.results_dir, 'metrics.json')
        with open(metrics_file, 'w') as f:
            json.dump(self.metrics, f, indent=4)
    
    def plot_confusion_matrix(self, cm):
        plt.figure(figsize=(10, 8))
        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
        plt.title('Confusion Matrix')
        plt.ylabel('True Label')
        plt.xlabel('Predicted Label')
        plt.savefig(os.path.join(self.results_dir, 'confusion_matrix.png'))
        plt.close()


def train_model(model, train_loader, val_loader, criterion, optimizer, 
                exp_tracker, num_epochs=50, device='cuda'):
    print(f"Using device: {device}")
    best_val_acc = 0.0
    
    for epoch in tqdm(range(num_epochs), desc="Training Progress"):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            
            with torch.cuda.amp.autocast():  # Mixed precision training
                outputs = model(inputs)
                loss = criterion(outputs, labels)
            
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
        
        train_loss = running_loss / len(train_loader)
        train_acc = 100. * correct / total
        exp_tracker.metrics['train_losses'].append(train_loss)
        exp_tracker.metrics['train_accs'].append(train_acc)
        
        model.eval()
        running_loss = 0.0
        correct = 0
        total = 0
        val_predictions = []
        val_targets = []
        
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                
                running_loss += loss.item()
                _, predicted = outputs.max(1)
                total += labels.size(0)
                correct += predicted.eq(labels).sum().item()
                
                val_predictions.extend(predicted.cpu().numpy())
                val_targets.extend(labels.cpu().numpy())
        
        val_loss = running_loss / len(val_loader)
        val_acc = 100. * correct / total
        exp_tracker.metrics['val_losses'].append(val_loss)
        exp_tracker.metrics['val_accs'].append(val_acc)
        
        print(f'Epoch {epoch+1}/{num_epochs}: Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%')
        print(f'Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%')
        
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'val_acc': val_acc,
            }, os.path.join(exp_tracker.results_dir, 'best_resnet_model.pth'))
    
    return model


def evaluate_model(model, test_loader, criterion, device, exp_tracker):
    model.eval()
    all_predictions = []
    all_targets = []
    test_loss = 0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for inputs, labels in tqdm(test_loader, desc="Evaluating"):
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            
            test_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
            
            all_predictions.extend(predicted.cpu().numpy())
            all_targets.extend(labels.cpu().numpy())
    
    test_acc = 100. * correct / total
    test_loss = test_loss / len(test_loader)
    cm = confusion_matrix(all_targets, all_predictions)
    class_metrics = precision_recall_fscore_support(all_targets, all_predictions, average='weighted')
    
    exp_tracker.metrics['final_test_metrics'] = {
        'test_accuracy': test_acc,
        'test_loss': test_loss,
        'precision': class_metrics[0],
        'recall': class_metrics[1],
        'f1_score': class_metrics[2]
    }
    exp_tracker.metrics['confusion_matrix'] = cm.tolist()
    exp_tracker.plot_confusion_matrix(cm)
    
    print("\nClassification Report:")
    print(classification_report(all_targets, all_predictions, 
                              target_names=[f'COPD{i}' for i in range(5)]))
    return test_acc, test_loss, cm


def main():
    torch.manual_seed(42)
    torch.cuda.manual_seed(42)
    np.random.seed(42)
    
    exp_tracker = ExperimentTracker('COPD_classification')
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    torch.backends.cudnn.benchmark = True
    
    train_transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(10),
        transforms.ColorJitter(brightness=0.2, contrast=0.2),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    
    val_transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    
    train_dataset = COPDDataset("/kaggle/working/Split_Dataset/train", transform=train_transform)
    val_dataset = COPDDataset("/kaggle/working/Split_Dataset/val", transform=val_transform)
    test_dataset = COPDDataset("/kaggle/working/Split_Dataset/test", transform=val_transform)
    
    exp_tracker.metrics['dataset_stats'] = {
        'train_samples': len(train_dataset),
        'val_samples': len(val_dataset),
        'test_samples': len(test_dataset),
        'class_distribution': train_dataset.class_counts
    }
    
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, 
                              num_workers=4, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, 
                            num_workers=4, pin_memory=True)
    test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, 
                             num_workers=4, pin_memory=True)
    
    model = models.resnet50(pretrained=True)
    num_ftrs = model.fc.in_features
    model.fc = nn.Linear(num_ftrs, 5)
    model = model.to(device)
    
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.AdamW(model.parameters(), lr=0.001, weight_decay=0.01)
    
    exp_tracker.metrics['training_params'] = {
        'model': 'ResNet50',
        'optimizer': 'AdamW',
        'learning_rate': 0.001,
        'weight_decay': 0.01,
        'batch_size': 32,
        'num_epochs': 50
    }
    
    model = train_model(model, train_loader, val_loader, criterion, optimizer, 
                        exp_tracker, num_epochs=50, device=device)
    
    test_acc, test_loss, cm = evaluate_model(model, test_loader, criterion, 
                                             device, exp_tracker)
    
    exp_tracker.save_metrics()
    print(f"\nExperiment complete. Results saved in: {exp_tracker.results_dir}")


if __name__ == '__main__':
    main()