<a href="https://colab.research.google.com/github/Kristin33/10605-mini/blob/main/10605_mini_project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os
import sys
import argparse
import re
import time
from datetime import datetime

import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

from torch.utils.data import DataLoader, TensorDataset
from torch.utils.tensorboard import SummaryWriter
from torch.optim.lr_scheduler import _LRScheduler

from keras.models import Sequential
from keras.layers import Dense
from keras.utils import np_utils
from keras.optimizers import RMSprop

from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.manifold import Isomap, MDS, TSNE
import matplotlib.pyplot as plt


# from conf import settings
# from utils import get_network, get_training_dataloader, get_test_dataloader, WarmUpLR, \
#     most_recent_folder, most_recent_weights, last_epoch, best_acc_weights

# Data


In [2]:
cifar100_train = torchvision.datasets.CIFAR100("cifar100", train=True, download=True)
cifar100_test = torchvision.datasets.CIFAR100("cifar100", train=False, download=True)
cifar10_train = torchvision.datasets.CIFAR10("cifar10", train=True, download=True)
cifar10_test = torchvision.datasets.CIFAR10("cifar10", train=False, download=True)

Downloading https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz to cifar100/cifar-100-python.tar.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting cifar100/cifar-100-python.tar.gz to cifar100
Files already downloaded and verified
Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to cifar10/cifar-10-python.tar.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting cifar10/cifar-10-python.tar.gz to cifar10
Files already downloaded and verified


In [3]:
train_data = cifar100_train.data # a numpy array
train_labels = np.expand_dims(np.array(cifar100_train.targets), 1) 
train_labels = np_utils.to_categorical(train_labels)
test_data = cifar100_test.data
test_labels = np.expand_dims(np.array(cifar100_test.targets), 1) 
test_labels = np_utils.to_categorical(test_labels)
print("Training data shape: ", train_data.shape)
print("Testing data shape: ", test_data.shape)

Training data shape:  (50000, 32, 32, 3)
Testing data shape:  (10000, 32, 32, 3)


In [4]:
'''
Transform the data such that each feature has zero mean and 1 std.
Args:
  data (ndarray): must have shape (N, k), where k is the number of features.
'''
def transform(data):
  scaler = StandardScaler()
  transformed_data = scaler.fit_transform(data)
  return transformed_data

def rescale(data):
  return data/255;

In [5]:
# flatten the data
train_data_flattened = train_data.reshape((50000, 3072))
test_data_flattened = test_data.reshape((10000, 3072))

# tranformation
transformed_train_data = train_data_flattened/255.0
transformed_test_data = test_data_flattened/255.0

# Dimensionality Reduction

In [6]:
reduced_dimension = 2

## PCA

In [7]:
time_start = time.time()
pca = PCA(n_components=reduced_dimension) 
pca.fit(transformed_train_data)
reduced_train_data_pca = pca.transform(transformed_train_data)
reduced_test_data_pca = pca.transform(transformed_test_data)
print ('PCA done! Time elapsed: {} seconds'.format(time.time()-time_start))
print("Percentage of variance explained: ", sum(pca.explained_variance_ratio_))
# Note: When n_components=50, 85% of variance is explained.

PCA done! Time elapsed: 11.036816358566284 seconds
Percentage of variance explained:  0.42859692565946594


In [8]:
pca50 = PCA(n_components=50) 
pca50.fit(transformed_train_data)
reduced_train_data_pca50 = pca50.transform(transformed_train_data)
reduced_test_data_pca50 = pca50.transform(transformed_test_data)

## ISOMAP

In [None]:
time_start = time.time()
isomap = Isomap(n_components=reduced_dimension)
isomap.fit(reduced_train_data_pca50)
reduced_train_data_isomap = isomap.transform(reduced_train_data_pca50[:200, :])
reduced_test_data_isomap = isomap.transform(reduced_test_data_pca50[:200, :])
print ('Isomap done! Time elapsed: {} seconds'.format(time.time()-time_start))

In [None]:
plot_distribution(reduced_train_data_isomap, train_labels)

## MDS

In [None]:
time_start = time.time()
mds = MDS(n_components=reduced_dimension)
mds.fit(reduced_train_data_pca50)
reduced_all_data_mds = mds.fit_transform(
    np.vstack((reduced_train_data_pca50[:200, :], reduced_test_data_pca50[:200, :])))
reduced_train_data_mds = reduced_all_data_mds[:200, :]
reduced_test_data_mds = reduced_all_data_mds[200:, :]
print ('MDS done! Time elapsed: {} seconds'.format(time.time()-time_start))

## T-SNE

In [1]:
time_start = time.time()
tsne = TSNE(n_components=2)
reduced_all_data_tsne = tsne.fit_transform(
    np.vstack((reduced_train_data_pca50[:200, :], reduced_test_data_pca50[:200, :])))
reduced_train_data_tsne = reduced_all_data_tsne[:200, :]
reduced_test_data_tsne = reduced_all_data_tsne[200:, :]
print ('T-SNE done! Time elapsed: {} seconds'.format(time.time()-time_start))

NameError: ignored

In [None]:
print(tsne.n_iter_)

# Run Model

In [8]:
batch_size = 128
num_classes = train_labels.shape[-1]
epochs = 140

In [9]:
model = Sequential()
model.add(Dense(1024, activation='relu', input_shape=(reduced_dimension,)))
model.add(Dense(1024, activation='relu'))
model.add(Dense(512, activation='relu'))
model.add(Dense(256, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 1024)              3072      
_________________________________________________________________
dense_1 (Dense)              (None, 1024)              1049600   
_________________________________________________________________
dense_2 (Dense)              (None, 512)               524800    
_________________________________________________________________
dense_3 (Dense)              (None, 256)               131328    
_________________________________________________________________
dense_4 (Dense)              (None, 100)               25700     
Total params: 1,734,500
Trainable params: 1,734,500
Non-trainable params: 0
_________________________________________________________________


In [10]:
model.compile(loss='categorical_crossentropy',
              optimizer=RMSprop(),
              metrics=['accuracy'])

print(reduced_train_data_pca.shape)
print(train_labels.shape)
print(reduced_test_data_pca.shape)
print(test_labels.shape)

history = model.fit(reduced_train_data_pca, train_labels, batch_size=batch_size,epochs=epochs,verbose=1,
                    validation_data=(reduced_test_data_pca, test_labels))

(50000, 2)
(50000, 100)
(10000, 2)
(10000, 100)
Epoch 1/140
Epoch 2/140
Epoch 3/140
Epoch 4/140
Epoch 5/140
Epoch 6/140
Epoch 7/140
Epoch 8/140
Epoch 9/140
Epoch 10/140
Epoch 11/140
Epoch 12/140
Epoch 13/140
Epoch 14/140
Epoch 15/140
Epoch 16/140
Epoch 17/140
Epoch 18/140
Epoch 19/140
Epoch 20/140
Epoch 21/140
Epoch 22/140
Epoch 23/140
Epoch 24/140
Epoch 25/140
Epoch 26/140
Epoch 27/140
Epoch 28/140
Epoch 29/140
Epoch 30/140
Epoch 31/140
Epoch 32/140
Epoch 33/140
Epoch 34/140
Epoch 35/140
Epoch 36/140
Epoch 37/140
Epoch 38/140
Epoch 39/140
Epoch 40/140
Epoch 41/140
Epoch 42/140
Epoch 43/140
Epoch 44/140
Epoch 45/140
Epoch 46/140
Epoch 47/140
Epoch 48/140
Epoch 49/140
Epoch 50/140
Epoch 51/140
Epoch 52/140
Epoch 53/140
Epoch 54/140
Epoch 55/140
Epoch 56/140
Epoch 57/140
Epoch 58/140
Epoch 59/140
Epoch 60/140
Epoch 61/140
Epoch 62/140
Epoch 63/140
Epoch 64/140
Epoch 65/140
Epoch 66/140
Epoch 67/140
Epoch 68/140
Epoch 69/140
Epoch 70/140
Epoch 71/140
Epoch 72/140
Epoch 73/140
Epoch 74/140