In [1]:
import os
import numpy as np
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
from torchvision import models,transforms
import torch.optim as optim
from sklearn.model_selection import train_test_split

In [2]:
# Function to read temperature values from a file
def read_temperature_file(filepath):
    try:
        with open(filepath, 'r') as file:
            lines = file.readlines()
            data = []
            for line in lines:
                # Clean and split the line
                line_data = line.strip().split()
                # Convert to float and append
                data.extend([float(val) for val in line_data])
            return np.array(data)
    except Exception as e:
        print(f"Error reading {filepath}: {e}")
        return None

In [3]:
# Function to load temperature data and labels from a directory
def load_data_from_directory(directory, label):
    data = []
    labels = []
    for filename in os.listdir(directory):
        filepath = os.path.join(directory, filename)
        if filepath.endswith('.txt'):
            temp_values = read_temperature_file(filepath)
            if temp_values is not None:
                data.append(temp_values)
                labels.append(label)
    return data, labels

In [4]:
training_healthy = '/kaggle/input/breast-cancer-temp-values/Breast_cancer_temp_values 2/Breast_cancer_temp_values/Single/Healthy/Training'
testing_healthy = '/kaggle/input/breast-cancer-temp-values/Breast_cancer_temp_values 2/Breast_cancer_temp_values/Single/Healthy/Testing'
training_sick = '/kaggle/input/breast-cancer-temp-values/Breast_cancer_temp_values 2/Breast_cancer_temp_values/Single/Sick/training'
testing_sick = '/kaggle/input/breast-cancer-temp-values/Breast_cancer_temp_values 2/Breast_cancer_temp_values/Single/Sick/Testing'

In [5]:
# Load data and labels
train_data_healthy, train_labels_healthy = load_data_from_directory(training_healthy, 0)
train_data_sick, train_labels_sick = load_data_from_directory(training_sick, 1)
test_data_healthy, test_labels_healthy = load_data_from_directory(testing_healthy, 0)
test_data_sick, test_labels_sick = load_data_from_directory(testing_sick, 1)

# Combine healthy and sick data
train_data = train_data_healthy + train_data_sick
train_labels = train_labels_healthy + train_labels_sick
test_data = test_data_healthy + test_data_sick
test_labels = test_labels_healthy + test_labels_sick

# Convert lists to numpy arrays
train_data = np.array(train_data)
train_labels = np.array(train_labels)
test_data = np.array(test_data)
test_labels = np.array(test_labels)

In [6]:
# Split the training data into training and validation sets (80-20 split)
train_data, val_data, train_labels, val_labels = train_test_split(train_data, train_labels, test_size=0.2, random_state=42)

print(f"Train data shape: {train_data.shape}")
print(f"Validation data shape: {val_data.shape}")
print(f"Test data shape: {test_data.shape}")

Train data shape: (37, 307200)
Validation data shape: (10, 307200)
Test data shape: (10, 307200)


In [7]:
# Define a PyTorch dataset
class TemperatureDataset(Dataset):
    def __init__(self, data, labels):
        self.data = torch.tensor(data, dtype=torch.float32)
        self.labels = torch.tensor(labels, dtype=torch.float32)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx]

In [8]:
# Create datasets and dataloaders
train_dataset = TemperatureDataset(train_data, train_labels)
val_dataset = TemperatureDataset(val_data, val_labels)
test_dataset = TemperatureDataset(test_data, test_labels)

train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False)


In [9]:
# Define a more complex neural network
class ComplexNN(nn.Module):
    def __init__(self, input_size):
        super(ComplexNN, self).__init__()
        self.fc1 = nn.Linear(input_size, 256)
        self.bn1 = nn.BatchNorm1d(256)
        self.drop1 = nn.Dropout(0.5)
        self.fc2 = nn.Linear(256, 128)
        self.bn2 = nn.BatchNorm1d(128)
        self.drop2 = nn.Dropout(0.5)
        self.fc3 = nn.Linear(128, 64)
        self.bn3 = nn.BatchNorm1d(64)
        self.drop3 = nn.Dropout(0.5)
        self.fc4 = nn.Linear(64, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = torch.relu(self.bn1(self.fc1(x)))
        x = self.drop1(x)
        x = torch.relu(self.bn2(self.fc2(x)))
        x = self.drop2(x)
        x = torch.relu(self.bn3(self.fc3(x)))
        x = self.drop3(x)
        x = self.sigmoid(self.fc4(x))
        return x

# Initialize the model, loss function, and optimizer
input_size = train_data.shape[1]
model = ComplexNN(input_size)
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

ComplexNN(
  (fc1): Linear(in_features=307200, out_features=256, bias=True)
  (bn1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (drop1): Dropout(p=0.5, inplace=False)
  (fc2): Linear(in_features=256, out_features=128, bias=True)
  (bn2): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (drop2): Dropout(p=0.5, inplace=False)
  (fc3): Linear(in_features=128, out_features=64, bias=True)
  (bn3): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (drop3): Dropout(p=0.5, inplace=False)
  (fc4): Linear(in_features=64, out_features=1, bias=True)
  (sigmoid): Sigmoid()
)

In [10]:
# Train and evaluate the model
num_epochs = 20
best_val_accuracy = 0.0

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for data, labels in train_dataloader:
        data, labels = data.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(data).squeeze()
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    train_loss = running_loss / len(train_dataloader)

    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for data, labels in val_dataloader:
            data, labels = data.to(device), labels.to(device)
            outputs = model(data).squeeze()
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            predicted = (outputs > 0.5).float()
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    val_loss = val_loss / len(val_dataloader)
    val_accuracy = 100 * correct / total

    print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.2f}%")

    # Save the best model based on validation accuracy
    if val_accuracy > best_val_accuracy:
        best_val_accuracy = val_accuracy
        torch.save(model.state_dict(), 'best_model.pth')


Epoch 1/20, Train Loss: 0.6408, Val Loss: 20.0000, Val Accuracy: 80.00%
Epoch 2/20, Train Loss: 0.5735, Val Loss: 3.1885, Val Accuracy: 80.00%
Epoch 3/20, Train Loss: 0.5625, Val Loss: 2.7896, Val Accuracy: 80.00%
Epoch 4/20, Train Loss: 0.5749, Val Loss: 2.6181, Val Accuracy: 80.00%
Epoch 5/20, Train Loss: 0.5591, Val Loss: 2.3742, Val Accuracy: 80.00%
Epoch 6/20, Train Loss: 0.6106, Val Loss: 2.3548, Val Accuracy: 80.00%
Epoch 7/20, Train Loss: 0.5310, Val Loss: 2.1862, Val Accuracy: 80.00%
Epoch 8/20, Train Loss: 0.6066, Val Loss: 2.0877, Val Accuracy: 80.00%
Epoch 9/20, Train Loss: 0.5322, Val Loss: 1.9212, Val Accuracy: 80.00%
Epoch 10/20, Train Loss: 0.6086, Val Loss: 1.5571, Val Accuracy: 80.00%
Epoch 11/20, Train Loss: 0.4521, Val Loss: 1.3508, Val Accuracy: 80.00%
Epoch 12/20, Train Loss: 0.5680, Val Loss: 1.0360, Val Accuracy: 80.00%
Epoch 13/20, Train Loss: 0.4762, Val Loss: 0.8212, Val Accuracy: 80.00%
Epoch 14/20, Train Loss: 0.4763, Val Loss: 0.5891, Val Accuracy: 80.00%


In [11]:
# Load the best model
model.load_state_dict(torch.load('best_model.pth'))

# Evaluate on the test set
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for data, labels in test_dataloader:
        data, labels = data.to(device), labels.to(device)
        outputs = model(data).squeeze()
        predicted = (outputs > 0.5).float()
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
test_accuracy = 100 * correct / total
print(f"Accuracy on test set: {test_accuracy:.2f}%")

Accuracy on test set: 60.00%
