## Data-Gen.ipynb
### This notebook extracts data from all the zip files. Then, all the colored images are converted into grayscale while also storing the (thermal, grayscale) image pairs in a dataframe. Finally the dataframe is stored in the memory for further usage.

In [1]:
# Imports
import os
import pandas as pd
import matplotlib.pyplot as plt
import zipfile
import cv2

In [3]:
# Extract data from zip files
ZIP_PATH = os.path.join("Dataset", "Zipped")
UNZIP_PATH = os.path.join("Dataset", "Unzipped")
if not os.path.isdir(UNZIP_PATH):
    os.mkdir(UNZIP_PATH)
for zips in os.listdir(ZIP_PATH):    
    with zipfile.ZipFile(os.path.join(ZIP_PATH, zips), 'r') as zip_ref:
        zip_ref.extractall(UNZIP_PATH)

In [5]:
# Convert RGB images to grayscale
for dir_ in os.listdir(UNZIP_PATH):
    if dir_[-1] == 'b': 
        dir_path = os.path.join(UNZIP_PATH, dir_)
        for img in os.listdir(dir_path):
            img_path = os.path.join(dir_path, img)
            gray = cv2.imread(img_path, 0)
            cv2.imwrite(img_path, gray)

In [12]:
# Store thermal and corresponding grayscale image in a dataframe
img_pairs = {'Thermal': [], 'Grayscale': []}
for dir_ in sorted(os.listdir(UNZIP_PATH)):
    dir_path = os.path.join(UNZIP_PATH, dir_)
    for img in sorted(os.listdir(dir_path)):
        img_path = os.path.join(os.path.abspath(dir_path), img)
        if dir_[-1] == 'a':
            img_pairs['Thermal'].append(img_path)
        elif dir_[-1] == 'b' and len(img_pairs['Grayscale']) < len(img_pairs['Thermal']):
            img_pairs['Grayscale'].append(img_path)
        else:
            os.remove(img_path)
            
df = pd.DataFrame(img_pairs)
print(df.shape)
df.head()

(8544, 2)


Unnamed: 0,Thermal,Grayscale
0,/home/prateekd007/Desktop/Work/College/Sem-5/M...,/home/prateekd007/Desktop/Work/College/Sem-5/M...
1,/home/prateekd007/Desktop/Work/College/Sem-5/M...,/home/prateekd007/Desktop/Work/College/Sem-5/M...
2,/home/prateekd007/Desktop/Work/College/Sem-5/M...,/home/prateekd007/Desktop/Work/College/Sem-5/M...
3,/home/prateekd007/Desktop/Work/College/Sem-5/M...,/home/prateekd007/Desktop/Work/College/Sem-5/M...
4,/home/prateekd007/Desktop/Work/College/Sem-5/M...,/home/prateekd007/Desktop/Work/College/Sem-5/M...


In [7]:
# Store dataframe in memory
PICKLE_PATH = "Pickles"
df.to_pickle(os.path.join(PICKLE_PATH, "df.pkl"))