# Introduction to the Data Sets

> Written by Dr Daniel Buscombe, Northern Arizona University

> Part of a series of notebooks for image recognition and classification using deep convolutional neural networks

The data sets provided are hosting within an Amazon Web Services S3 bucket

In [None]:
import s3fs
fs = s3fs.S3FileSystem(anon=True)
fs.ls('cdi-workshop')

## Looking at file structure

There are two major subdirectories:
* 'imrecog_data': contains example data sets for image recognition
* 'semseg_data': contains example data sets for semantic segmentation

In [None]:
fs.ls('cdi-workshop/imrecog_data')

In [None]:
fs.ls('cdi-workshop/semseg_data')

In [None]:
len(fs.ls('cdi-workshop/semseg_data/gc/train'))

In [None]:
fs.ls('cdi-workshop/imrecog_data/NWPU-RESISC45/test/')

## Reading and displaying imagery

In [None]:
from imageio import imread
import matplotlib.pyplot as plt

In [None]:
with fs.open('cdi-workshop/imrecog_data/NWPU-RESISC45/test/airplane/airplane_700.jpg', 'rb') as f:
    image = imread(f, 'jpg')
    plt.figure(0, figsize=(10,10))
    plt.imshow(image);

In [None]:
fs.ls('cdi-workshop/imrecog_data/NWPU-RESISC45/test')

In [None]:
names = [f for f in fs.ls('cdi-workshop/imrecog_data/NWPU-RESISC45/test/baseball_diamond') if f.endswith('.jpg')]
names = names[:10]

In [None]:
fig, ax = plt.subplots(3, 3)
fig.set_figheight(15)
fig.set_figwidth(15)
for i, axi in enumerate(ax.flat):
    with fs.open(names[i], 'rb') as f:
        image = imread(f, 'jpg')
    axi.imshow(image)
    axi.set(xticks=[], yticks=[])

## Read Labels

In [None]:
fs.ls('cdi-workshop/semseg_data/gc/labels')

In [None]:
with fs.open('cdi-workshop/semseg_data/gc/labels/labels.txt', 'rb') as f:
      labels = f.readlines()

labels = [x.strip() for x in labels] 
print(labels)
print(labels[0].decode())

## Read binary data files

In [None]:
from scipy.io import loadmat

In [None]:
with fs.open('cdi-workshop/semseg_data/ontario/test/A2014862_geotag_mres.mat') as f:
    dat = loadmat(f)

In [None]:
dat.keys()