In [None]:
!pip install gwu-nn
!pip install tqdm
!pip install medmnist


Collecting medmnist
  Downloading medmnist-2.2.3-py3-none-any.whl (22 kB)
Collecting fire (from medmnist)
  Downloading fire-0.5.0.tar.gz (88 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m88.3/88.3 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: fire
  Building wheel for fire (setup.py) ... [?25l[?25hdone
  Created wheel for fire: filename=fire-0.5.0-py2.py3-none-any.whl size=116933 sha256=76eb4d23df08779d4a7e6b8cb5ce98cd19b6071790f9a71fde2a27f469c5e1c4
  Stored in directory: /root/.cache/pip/wheels/90/d4/f7/9404e5db0116bd4d43e5666eaa3e70ab53723e1e3ea40c9a95
Successfully built fire
Installing collected packages: fire, medmnist
Successfully installed fire-0.5.0 medmnist-2.2.3


In [None]:
from tqdm import tqdm
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
import torchvision.transforms as transforms
import medmnist
from medmnist import INFO, Evaluator

NUM_EPOCHS = 3
BATCH_SIZE = 128
lr = 0.001


info = INFO[data_flag]
task = info['task']
n_channels = info['n_channels']
n_classes = len(info['label'])

DataClass = getattr(medmnist, info['python_class'])


# Preprocess Images
1. Convert images to tensors
2. Normalize images

In [None]:
from medmnist import DermaMNIST
train = DermaMNIST(split="train")
train
x_train = train.imgs
y_train = train.labels
test = DermaMNIST(split="test")
x_test = test.imgs
y_test = test.labels
val = DermaMNIST(split="val")
x_val = val.imgs
y_val = val.labels

# Summary of Data
We can take a look at the distribution of test and training data as well as the types of labels that exists in this dataset

In [None]:
print("x_train shape:", x_train.shape)
print("y_train shape:", y_train.shape)
print("x_test shape:", x_test.shape)
print("y_test shape:", y_test.shape)
print("x_val shape:", x_val.shape)
print("y_val shape:", y_val.shape)

x_train shape: (7007, 28, 28, 3)
y_train shape: (7007, 1)
x_test shape: (2005, 28, 28, 3)
y_test shape: (2005, 1)
x_val shape: (1003, 28, 28, 3)
y_val shape: (1003, 1)


# Visualize Dataset
It's important to understand how the data looks like to build a suitable architecture

In [None]:
import skimage
from skimage.util import montage as skimage_montage
montage_arr = skimage_montage(x_test[0], channel_axis=3 if n_channels == 3 else None)


AxisError: ignored

# Building Model Architecture

In [None]:
import numpy as np
from gwu_nn.gwu_network import GWUNetwork
from gwu_nn.layers import Dense
from gwu_nn.activation_layers import RELU, Softmax
from sklearn.preprocessing import OneHotEncoder

In [None]:

# Preprocess the data
enc = OneHotEncoder(categories='auto', sparse=False)
y_train_encoded = enc.fit_transform(y_train.reshape(-1, 1))
y_test_encoded = enc.transform(y_test.reshape(-1, 1))
y_val_encoded = enc.transform(y_val.reshape(-1, 1))

# Reshape the input data to be flat
x_train_flat = x_train.reshape(x_train.shape[0], -1)
x_test_flat = x_test.reshape(x_test.shape[0], -1)
x_val_flat = x_val.reshape(x_val.shape[0], -1)

# Create a GWU Network
model = GWUNetwork()

# Track the output size
output_size = x_train_flat.shape[1]

# Add a "convolutional" layer (using Dense layer and reshaping)
dense_layer1 = Dense(32, activation='relu', input_size=output_size)
model.add(dense_layer1)
output_size = dense_layer1.output_size  # Update output_size

relu_layer1 = RELU()
model.add(relu_layer1)

dense_layer2 = Dense(64, activation='relu', input_size=output_size)
model.add(dense_layer2)
output_size = dense_layer2.output_size  # Update output_size

relu_layer2 = RELU()
model.add(relu_layer2)

dense_layer3 = Dense(7, activation='Softmax', input_size=output_size)  # Assuming 7 categories
model.add(dense_layer3)
output_size = dense_layer3.output_size  # Update output_size

softmax_layer = Softmax()
model.add(softmax_layer)

# Compile the model
model.compile('cross_entropy', lr=0.001)

# Train the model
model.fit(x_train_flat, y_train_encoded, epochs=10, batch_size=32)

# Evaluate on the test set
loss = model.evaluate(x_test_flat, y_test_encoded)
print(f'Test Loss: {loss}')



AttributeError: ignored

In [None]:
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from gwu_nn.gwu_network import GWUNetwork
from gwu_nn.layers import Dense
from gwu_nn.activation_layers import Sigmoid

np.random.seed(8)
num_obs = 8000

# Create our features to draw from two distinct 2D normal distributions
x1 = np.random.multivariate_normal([0, 0], [[1, .75],[.75, 1]], num_obs)
x2 = np.random.multivariate_normal([3, 8], [[1, .25],[.25, 1]], num_obs)

# Stack our inputs into one feature space
X = np.vstack((x1, x2))
print(X.shape)

y = np.hstack((np.zeros(num_obs), np.ones(num_obs)))
print(y.shape)


# colors = ['red'] * num_obs + ['blue'] * num_obs
# plt.figure(figsize=(12,8))
# plt.scatter(X[:, 0], X[:, 1], c = colors, alpha = 0.5)

# Lets randomly split things into training and testing sets so we don't cheat
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

# Create our model
network = GWUNetwork()
network.add(Dense(2, 1, True, 'sigmoid'))
network.add(Sigmoid())
#network.set_loss('mse')
network.compile('log_loss', 0.001)
network.fit(X_train, y_train, epochs=100)




from scipy.special import logit

colors = ['red'] * num_obs + ['blue'] * num_obs
plt.figure(figsize=(12, 8))
plt.scatter(X[:, 0], X[:, 1], c=colors, alpha=0.5)

# Range of our X values
start_x1 = -5
end_x1 = 7

weights = network.layers[0].weights.reshape(-1).tolist()
bias = network.layers[0].bias[0][0]
start_y = (bias + start_x1 * weights[0] - logit(0.5)) / - weights[1]
end_y = (bias + end_x1 * weights[0] - logit(0.5)) / -weights[1]
plt.plot([start_x1, end_x1], [start_y, end_y], color='grey')


(16000, 2)
(16000,)


KeyError: ignored

# Training the Model


In [None]:
# train

for epoch in range(NUM_EPOCHS):
    train_correct = 0
    train_total = 0
    test_correct = 0
    test_total = 0

    model.train()
    for inputs, targets in tqdm(train_loader):
        # forward + backward + optimize
        optimizer.zero_grad()
        outputs = model(inputs)

        if task == 'multi-label, binary-class':
            targets = targets.to(torch.float32)
            loss = criterion(outputs, targets)
        else:
            targets = targets.squeeze().long()
            loss = criterion(outputs, targets)

        loss.backward()
        optimizer.step()

100%|██████████| 55/55 [00:16<00:00,  3.33it/s]
100%|██████████| 55/55 [00:16<00:00,  3.25it/s]
100%|██████████| 55/55 [00:16<00:00,  3.33it/s]


# Evaluating the model

In [None]:
# evaluation

def test(split):
    model.eval()
    y_true = torch.tensor([])
    y_score = torch.tensor([])

    data_loader = train_loader_at_eval if split == 'train' else test_loader

    with torch.no_grad():
        for inputs, targets in data_loader:
            outputs = model(inputs)

            if task == 'multi-label, binary-class':
                targets = targets.to(torch.float32)
                outputs = outputs.softmax(dim=-1)
            else:
                targets = targets.squeeze().long()
                outputs = outputs.softmax(dim=-1)
                targets = targets.float().resize_(len(targets), 1)

            y_true = torch.cat((y_true, targets), 0)
            y_score = torch.cat((y_score, outputs), 0)

        y_true = y_true.numpy()
        y_score = y_score.detach().numpy()

        evaluator = Evaluator(data_flag, split)
        metrics = evaluator.evaluate(y_score)

        print('%s  auc: %.3f  acc:%.3f' % (split, *metrics))


print('==> Evaluating ...')
test('train')
test('test')


==> Evaluating ...
train  auc: 0.826  acc:0.670
test  auc: 0.826  acc:0.669


In [None]:
from google.colab import drive
drive.mount('/content/gdrive')


Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [None]:
import os
folder_path = os.path.join('/content/gdrive/My Drive/', 'MedicalMNIST')

In [None]:
# Get a list of all the files in the folder
list_items = os.listdir(folder_path)

# Print the list of items
print(list_items)

['Hand', 'BreastMRI', 'AbdomenCT', 'CXR', 'ChestCT', 'HeadCT']


In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense



# Function to load images and labels
def load_data(data_path):
    images = []
    labels = []
    total_images = sum(len(files) for _, _, files in os.walk(data_path))

    with tqdm(total=total_images, desc="Loading images") as pbar:
        for folder in os.listdir(data_path):
            folder_path = os.path.join(data_path, folder)
            if os.path.isdir(folder_path):
                for filename in os.listdir(folder_path):
                    img_path = os.path.join(folder_path, filename)
                    img = plt.imread(img_path)
                    images.append(img)
                    labels.append(folder)
                    pbar.update(1)  # Update progress bar
    return np.array(images), np.array(labels)

# Load data
images, labels = load_data(folder_path)


Loading images:   0%|          | 0/6 [00:00<?, ?it/s]
Loading images from Hand: 0it [00:00, ?it/s]
Loading images:  17%|█▋        | 1/6 [00:00<00:01,  2.96it/s]
Loading images from BreastMRI: 0it [00:00, ?it/s]
Loading images:  33%|███▎      | 2/6 [00:02<00:06,  1.56s/it]
Loading images from AbdomenCT:   0%|          | 0/330 [00:00<?, ?it/s][A
Loading images from AbdomenCT:   0%|          | 1/330 [02:11<12:03:28, 131.94s/it]
Loading images:  33%|███▎      | 2/6 [02:15<04:31, 67.98s/it]


KeyboardInterrupt: ignored

In [None]:
import numpy as np
import pandas as pd
from sklearn import preprocessing
from sklearn.model_selection import train_test_split

from gwu_nn.gwu_network import GWUNetwork
from gwu_nn.layers import Dense
from gwu_nn.activation_layers import Sigmoid


y_col = 'Survived'
x_cols = ['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked']
df = pd.read_csv('examples/data/titanic_data.csv')
y = np.array(df[y_col]).reshape(-1, 1)
orig_X = df[x_cols]

# Lets standardize our features
scaler = preprocessing.StandardScaler()
stand_X = scaler.fit_transform(orig_X)
X = stand_X

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

network = GWUNetwork()
network.add(Dense(14, add_bias=True, input_size=X.shape[1]))
network.add(Dense(1, add_bias=True))
network.add(Sigmoid())
network.compile(loss='log_loss', lr=.01)
network.fit(X_train, y_train, batch_size=10, epochs=100)

predictions = network.predict(X_test)

FileNotFoundError: ignored