In [None]:
import random
from collections import deque              # dequeは知らない。
from tqdm import tqdm
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE

# PyTorch関係のモジュール
from torch.utils.data import Dataset, DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
import torchvision

# モジュール等の確認

## What is deque? ( a data structure)

In [None]:
## What is deque?

"""
In Python, a deque (short for "double-ended queue") is a data structure provided by the collections module
It represents a queue with the ability to efficiently add or remove elements from both ends.
It combines features of stacks and queues, allowing for fast operations at both ends of the queue.
""""

from collections import deque

my_deque = deque([1, 2, 3])
my_deque.append(4)
my_deque.appendleft(0)
print(my_deque)  # Output: deque([0, 1, 2, 3, 4])


deque([0, 1, 2, 3, 4])


## What is tqdm? ( "taqaddum" in Arabic, which means "progress.")

In [None]:
## What is tqdm?
"""
tqdm is a popular Python library that
provides a fast, extensible progress bar for loops and other iterative tasks.
The name "tqdm" stands for "taqaddum" in Arabic, which means "progress."

"""

from tqdm import tqdm
import time

# Simulating a time-consuming loop
### In this example, the loop iterates 10 times,
### and the tqdm progress bar shows the progress of each iteration with a description "Processing."
### As each iteration completes, the progress bar updates to show the percentage of completion.

for _ in tqdm(range(10), desc="Processing"):
    time.sleep(1)
    # time.sleep(1) is a Python function call that causes the program to pause or sleep for a specified number of seconds.
    # It's part of the time module in Python's standard library.
    # In this example, the program will output "Starting...", then pause for 1 second using time.sleep(1), and finally output "One second later..." after the pause.



Processing: 100%|██████████| 10/10 [00:10<00:00,  1.00s/it]


## What is TSNE? (t-Distributed stochastic neighbor embedding (t-SNE) method )

In [None]:
## What is TSNE?

# t-Distributed stochastic neighbor embedding (t-SNE) method
# is an unsupervised machine learning technique for nonlinear dimensionality reduction to visualize high-dimensional data into low dimensional space.
# (https://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.html)


## What is Dataset, DataLoader in torch?  --->まだよくわからん。。。

In [None]:
##
"""
(In the context of the PyTorch library, )
Dataset and DataLoader are classes
that help you manage and efficiently load data for training and inference in machine learning models.
"""



"""
1. A Dataset in PyTorch is an abstraction that represents a collection of data samples,
typically used to provide data to a machine learning model for training or evaluation.
It encapsulates the raw data and provides a consistent interface to access individual data points along with their corresponding labels (if applicable).
"""


from torch.utils.data import Dataset

class CustomDataset(Dataset):
    def __init__(self, data, labels):
        self.data = data
        self.labels = labels

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        return self.data[index], self.labels[index]






"""
2. A DataLoader is a utility class that helps you efficiently load and batch data from a dataset during training or evaluation.
It provides iterators to iterate through mini-batches of data,
which can significantly speed up training by reducing the overhead associated with loading data one sample at a time.
"""
from torch.utils.data import DataLoader

# Create a dataset instance
dataset = CustomDataset(data, labels)

# Create a DataLoader
batch_size = 32
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# Iterate through mini-batches
for batch_data, batch_labels in dataloader:
    # Perform training or evaluation with the batch data and labels
    pass


## What is torchvision?

In [None]:
"""
torchvision is a PyTorch library that
provides utility functions, datasets, and pre-trained models specifically designed for computer vision tasks.
It's built to make working with image data and neural networks in PyTorch more convenient
by offering various tools and resources tailored to vision-related tasks.

"""

In [None]:
#Here's a simplified example of using torchvision to load the CIFAR-10 dataset:
import torchvision
import torchvision.transforms as transforms

# Data preprocessing and transformation
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
])

# Load the CIFAR-10 dataset
train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, transform=transform, download=True)

# Create a DataLoader
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)


# CIFAR10

## 疑問点

### What is np.stack()?

In [1]:
"""
np.stack() is a function provided by the NumPy library in Python that is used to stack arrays along a new axis.
 It takes a sequence of arrays as input and returns a new array formed by stacking the input arrays along a specified axis.
"""
import numpy as np

array1 = np.array([1, 2, 3])
array2 = np.array([4, 5, 6])

stacked_array = np.stack((array1, array2), axis=0)
print(stacked_array)


[[1 2 3]
 [4 5 6]]


In [2]:
## code

## download data

In [None]:
dataset = torchvision.datasets.CIFAR10(
    root='data', train=True, download=True)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:02<00:00, 75576095.52it/s]


Extracting data/cifar-10-python.tar.gz to data


## 各物体クラスの画像の表示


In [None]:
#check
print(dataset)
print(len(dataset))
print("------------------------------------------------------------------------------------")
print(dataset.classes)
print(len(dataset.classes))

Dataset CIFAR10
    Number of datapoints: 50000
    Root location: data
    Split: Train
50000
------------------------------------------------------------------------------------
['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
10


In [None]:
#表示ずみの画像のラベルを保存する変数   ---->　setなの？
displayed_classes = set()
i=0

# 全ての
while  i <(len(dataset))   and   len(displayed_classes)<len(data_set.classes):

  img, label = dataset[i]
  if