In [1]:
import torch
import torch.nn.functional as F
import numpy as np
import pandas as pd

# Load MNIST dataset using torchvision
from torchvision import datasets, transforms
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])

mnist_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
mnist_loader = torch.utils.data.DataLoader(dataset=mnist_dataset, batch_size=len(mnist_dataset))

# Extract data and labels
data, labels = next(iter(mnist_loader))
data = data.view(len(mnist_dataset), -1)

# Set the number of rows and columns
num_rows = labels.size(0)
num_columns = 9  # Assuming 9 additional random labels per row

# Create a shuffled copy of the labels for each row
shuffled_labels = torch.tensor([np.random.choice(np.setdiff1d(range(10), [label.item()]), num_columns, replace=False) for label in labels])

# Combine the original labels and shuffled labels
combined_labels = torch.cat((labels.view(-1, 1), shuffled_labels), dim=1)

# Convert to NumPy array and create a DataFrame
combined_labels_np = combined_labels.numpy()
df = pd.DataFrame(combined_labels_np, columns=['Original'] + [f'Random_{i+1}' for i in range(num_columns)])

# Print the DataFrame
print(df.head())

# Save the DataFrame to a CSV file
df.to_csv('mnist_labels_pytorch.csv', index=False)


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 109065349.54it/s]


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 65514166.48it/s]


Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 78265938.58it/s]


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 25000693.92it/s]

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw






   Original  Random_1  Random_2  Random_3  Random_4  Random_5  Random_6  \
0         5         0         6         7         4         9         3   
1         0         2         3         9         1         8         4   
2         4         1         6         3         8         5         0   
3         1         0         4         5         9         6         8   
4         9         2         1         6         0         5         8   

   Random_7  Random_8  Random_9  
0         8         2         1  
1         6         5         7  
2         9         2         7  
3         2         7         3  
4         7         3         4  


  shuffled_labels = torch.tensor([np.random.choice(np.setdiff1d(range(10), [label.item()]), num_columns, replace=False) for label in labels])


In [3]:
import numpy as np
import pandas as pd
from torchvision import datasets, transforms

# Load MNIST dataset
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
mnist_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)

# Extract data and labels
images = mnist_dataset.data.numpy()  # Convert PyTorch tensor to NumPy array
labels = mnist_dataset.targets.numpy()

# Reshape images to flatten them
num_images, height, width = images.shape
flattened_images = images.reshape(num_images, height * width)

# Create a DataFrame
df = pd.DataFrame({'Label': labels})
df['Flattened_Image'] = flattened_images.tolist()  # Convert to list for DataFrame compatibility

# Save to CSV file
df.to_csv('mnist_data.csv', index=False)

# Load data back from the CSV file
loaded_df = pd.read_csv('mnist_data.csv')

# Convert the 'Flattened_Image' column back to NumPy arrays
loaded_df['Flattened_Image'] = loaded_df['Flattened_Image'].apply(eval)

# Print the loaded DataFrame
print(loaded_df.head())

   Label                                    Flattened_Image
0      5  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
1      0  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
2      4  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
3      1  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
4      9  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...


In [2]:
import numpy as np
import pandas as pd

# Load the first CSV file
df1 = pd.read_csv('/home/aalmansour/source/lidc_slices/MNIST/mnist_data.csv')

# Load the second CSV file
df2 = pd.read_csv('/home/aalmansour/source/lidc_slices/MNIST/mnist_labels.csv')

# Merge the two DataFrames based on a common column (e.g., 'common_column')
merged_df = pd.merge(df1, df2, on='Label')

# Save the merged DataFrame to a new CSV file
merged_df.to_csv('merged_mnist_file.csv', index=False)