In [None]:
%config Application.iopub_data_rate_limit=10000000 #for increasing the buffer limit to upload images


In [None]:
#module import 

import tensorflow as tf
from __future__ import print_function

from PIL import Image
from multiprocessing import Pool
import warnings
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import os


from keras.models import Model
from keras.layers import Flatten
from keras.layers import Dense
from keras.layers import Input
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import GlobalMaxPooling2D
from keras.layers import GlobalAveragePooling2D
from keras.preprocessing import image
from keras.utils import layer_utils
from keras.utils.data_utils import get_file
from keras import backend as K
from keras.applications.imagenet_utils import decode_predictions
from keras.applications.imagenet_utils import preprocess_input
from sklearn.preprocessing import LabelEncoder , OneHotEncoder
from keras.optimizers import Adam

#from keras.applications.imagenet_utils import _obtain_input_shape # this will work for older versions of keras. 2.2.0 or before
#from keras.engine.topology import get_source_inputs

In [None]:
# Dummy Model Code

#our_model = tf.keras.Sequential([tf.keras.layers.Input(shape=(64,)),
                                #tf.keras.layers.Dense(1)])

#model creation from scratch

def galaxy(input_tensor=None,classes=2): 
    ''' this function will create a VGG16 convolutional neural network '''   
   
    img_rows, img_cols = 224, 224   # by default size is 224,224
    img_channels = 1

    img_dim = (img_rows, img_cols, img_channels)
   
    img_input = Input(shape=img_dim)
    
    # Block 1
    x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1')(img_input)
    x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2')(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x)

    # Block 2
    x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1')(x)
    x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2')(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x)

    # Block 3
    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1')(x)
    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2')(x)
    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv3')(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x)

    # Block 4
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1')(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2')(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv3')(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x)

    # Block 5
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv1')(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv2')(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv3')(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x)

    
    # Classification block
    x = Flatten(name='flatten')(x)
    x = Dense(4096, activation='relu', name='fc1')(x)
    x = Dense(4096, activation='relu', name='fc2')(x)
    x = Dense(classes, activation='softmax', name='predictions')(x)

    # Create model.
   
     
    model = Model(inputs = img_input, outputs = x, name='galaxy')


    return model


In [None]:
 model = galaxy(classes = 2) #real and fake images of galaxies
model.summary()       

Model: "galaxy"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 224, 224, 1)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      640       
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0    

In [None]:
#mounting drive to access dataset
from google.colab import drive  
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
os.chdir('/content/drive/MyDrive/Group_Project_Data')
print(os.getcwd())

/content/drive/MyDrive/Group_Project_Data


In [None]:
#getting path to the dataset provided
#loaded dataset into googledrive and given the path
# there was a hidden folder '.lost Dir'which was created itself for keeping attributes and other information
#for dataset so it was not required for processing and training


dataset_path = '/content/drive/MyDrive/Group_Project_Data'

# List all subdirectories in the dataset
galaxy_types = [dir_name for dir_name in os.listdir(os.path.join(dataset_path, 'Train')) if not dir_name.startswith('.')]
print('Types of galaxies images found:', len(galaxy_types))
print(galaxy_types)#what kinds of images  are in this dataset



# List all subdirectories in the dataset
galaxy_typesvalid = [dir_name for dir_name in os.listdir(os.path.join(dataset_path, 'Valid')) if not dir_name.startswith('.')]
print('Types of galaxies images found in validation folder:', len(galaxy_types))
print(galaxy_typesvalid)#what kinds of images  are in this dataset



Types of galaxies images found: 2
['Fake', 'Real']
Types of galaxies images found in validation folder: 2
['Real', 'Fake']


In [None]:
#collecting all images that is REAL and FAKE galaxies into single list 
# for algorithm implementation


gal_images = []

for item in galaxy_types:
  all_images = os.listdir('/content/drive/MyDrive/Group_Project_Data/Train' + '/' +item)
  
  for image in all_images:
    gal_images.append((item, str('/content/drive/MyDrive/Group_Project_Data/Train' + '/' +item) + '/' + image))

print(gal_images)
print(len(gal_images)) # indicating the number of images in the list



gal_imagesvalid = []

for item in galaxy_typesvalid:
  all_images = os.listdir('/content/drive/MyDrive/Group_Project_Data/Valid' + '/' +item)
  
  for image in all_images:
    gal_imagesvalid.append((item, str('/content/drive/MyDrive/Group_Project_Data/Valid' + '/' +item) + '/' + image))

print(gal_imagesvalid)
print(len(gal_imagesvalid)) # indicating the number of images in the list







[]
0
[('Real', '/content/drive/MyDrive/Group_Project_Data/Valid/Real/img_610.png'), ('Real', '/content/drive/MyDrive/Group_Project_Data/Valid/Real/img_602.png'), ('Real', '/content/drive/MyDrive/Group_Project_Data/Valid/Real/img_603.png'), ('Real', '/content/drive/MyDrive/Group_Project_Data/Valid/Real/img_61.png'), ('Real', '/content/drive/MyDrive/Group_Project_Data/Valid/Real/img_600.png'), ('Real', '/content/drive/MyDrive/Group_Project_Data/Valid/Real/img_601.png'), ('Real', '/content/drive/MyDrive/Group_Project_Data/Valid/Real/img_597.png'), ('Real', '/content/drive/MyDrive/Group_Project_Data/Valid/Real/img_593.png'), ('Real', '/content/drive/MyDrive/Group_Project_Data/Valid/Real/img_596.png'), ('Real', '/content/drive/MyDrive/Group_Project_Data/Valid/Real/img_595.png'), ('Real', '/content/drive/MyDrive/Group_Project_Data/Valid/Real/img_594.png'), ('Real', '/content/drive/MyDrive/Group_Project_Data/Valid/Real/img_592.png'), ('Real', '/content/drive/MyDrive/Group_Project_Data/Valid/R

In [None]:
model.compile(optimizer=Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
# Build a dataframe of images for Train folder   
galimages_df = pd.DataFrame(data=gal_images, columns=['galaxy type(train)', 'image'])
print(galimages_df.head())
print(galimages_df.tail())

# Build a dataframe of images for valid folder   
galimagesvalid_df = pd.DataFrame(data=gal_imagesvalid, columns=['galaxy type(valid)', 'image'])
print(galimagesvalid_df.head())
print(galimagesvalid_df.tail())


Empty DataFrame
Columns: [galaxy type(train), image]
Index: []
Empty DataFrame
Columns: [galaxy type(train), image]
Index: []
  galaxy type(valid)                                              image
0               Real  /content/drive/MyDrive/Group_Project_Data/Vali...
1               Real  /content/drive/MyDrive/Group_Project_Data/Vali...
2               Real  /content/drive/MyDrive/Group_Project_Data/Vali...
3               Real  /content/drive/MyDrive/Group_Project_Data/Vali...
4               Real  /content/drive/MyDrive/Group_Project_Data/Vali...
    galaxy type(valid)                                              image
599               Real  /content/drive/MyDrive/Group_Project_Data/Vali...
600               Real  /content/drive/MyDrive/Group_Project_Data/Vali...
601               Real  /content/drive/MyDrive/Group_Project_Data/Vali...
602               Real  /content/drive/MyDrive/Group_Project_Data/Vali...
603               Real  /content/drive/MyDrive/Group_Project_Data/Vali..

In [None]:
# Let's check how many samples for each category are present in Train folder
print("Total number of galaxy images in the dataset(train): ", len(galimages_df))

galaxies_count1 = galimages_df['galaxy type(train)'].value_counts()

print("galaxies in each category: ")
print(galaxies_count1)


# Let's check how many samples for each category are present in Valid folder

print("Total number of galaxy images in the dataset(valid): ", len(galimagesvalid_df))

galaxies_count2 = galimagesvalid_df['galaxy type(valid)'].value_counts()

print("galaxies in each category: ")
print(galaxies_count2)

Total number of galaxy images in the dataset(train):  0
galaxies in each category: 
Series([], Name: galaxy type(train), dtype: int64)
Total number of galaxy images in the dataset(valid):  604
galaxies in each category: 
Real    604
Name: galaxy type(valid), dtype: int64


In [None]:
!pip install opencv-python-headless --upgrade

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
#resizing images for the algorithm implementation for Training dataset
import cv2

path_real = '/content/drive/MyDrive/Group_Project_Data/Train/Real'
path_fake = '/content/drive/MyDrive/Group_Project_Data/Train/Fake'
im_size = 224


images = []
labels = []
#process for real images
for f in os.listdir(path_real):
    if f.endswith('.jpg') or f.endswith('.jpeg') or f.endswith('.png'): # check file extension
        img = Image.open(os.path.join(path_real, f))
        img = np.array(img.resize((im_size, im_size)))
        images.append(img)
        labels.append("Real")

# process fake images
for f in os.listdir(path_fake):
    if f.endswith('.jpg') or f.endswith('.jpeg') or f.endswith('.png'): # check file extension
        img = Image.open(os.path.join(path_fake, f))
        img = np.array(img.resize((im_size, im_size)))
        images.append(img)
        labels.append("fake")


In [None]:
#resizing images for the algorithm implementation for Validation dataset
import cv2

path_real2 = '/content/drive/MyDrive/Group_Project_Data/Valid/Real'
path_fake2 = '/content/drive/MyDrive/Group_Project_Data/Valid/Fake'
im_size = 224


images2 = []
labels2 = []
#process for real images
for f in os.listdir(path_real2):
    if f.endswith('.jpg') or f.endswith('.jpeg') or f.endswith('.png'): # check file extension
        img = Image.open(os.path.join(path_real2, f))
        img = np.array(img.resize((im_size, im_size)))
        images2.append(img)
        labels2.append("Real")

# process fake images
for f in os.listdir(path_fake2):
    if f.endswith('.jpg') or f.endswith('.jpeg') or f.endswith('.png'): # check file extension
        img = Image.open(os.path.join(path_fake2, f))
        img = np.array(img.resize((im_size, im_size)))
        images2.append(img)
        labels2.append("fake")

In [None]:
#converting into arrays because algorithm understands array

images = np.array(images)

images = images.astype('float32') / 255.0   #pixel intensity 0-225 so dividing it
print(images.shape)

#converting into arrays because algorithm understands array(validation)

imagesvalid = np.array(images2)
imagesvalid = imagesvalid.astype('float32') / 255.0  #pixel intensity 0-225 so dividing it
imagesvalid.shape




(0,)


(1869, 224, 224)

In [None]:
#performing label and one hot enconding on the dataset to convert string data
# i.e galaxy type coloumn into 0's and 1's


y_tr=galimages_df['galaxy type(train)'].values


y_tr_labelencoder = LabelEncoder ()
y_tr = y_tr_labelencoder.fit_transform (y_tr)
indices = np.random.choice(len(y_tr), size=0, replace=False)
y_tr = np.delete(y_tr, indices)
print (y_tr)
print(y_tr.shape)


y_tes=galimagesvalid_df['galaxy type(valid)'].values


y_tes_labelencoder = LabelEncoder ()
y_tes = y_tes_labelencoder.fit_transform (y_tes)
indices = np.random.choice(len(y_tes), size=1, replace=False)
y_tes = np.delete(y_tes, indices)
print (y_tes)
print(y_tes.shape)




[]
(0,)
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 

In [None]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder

y_tr = y_tr.reshape(-1, 1)
ct = ColumnTransformer([('one_hot_encoder', OneHotEncoder(), [0])])#Converted  scalar output into vector output 
                                                                   #where the correct class will be 1 and other will be 0
Y = ct.fit_transform(y_tr)
Y = Y.astype('float32') / 255.0 
print(Y.shape)

y_tes = y_tes.reshape(-1, 1)
CT = ColumnTransformer([('one_hot_encoder', OneHotEncoder(), [0])])
Y_T = CT.fit_transform(y_tes)
Y_T = Y_T.astype('float32') / 255.0
print(Y_T.shape)




(6034, 2)
(2000, 2)


In [None]:
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split

images, Y = shuffle(images, Y, random_state=1)
x_train, x_test, y_train, y_test = train_test_split(images, Y, test_size=0.05, random_state=415)

#inspect the shape of the training and testing.
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)


(5732, 224, 224)
(5732, 2)
(302, 224, 224)
(302, 2)


In [None]:
#training code
history = model.fit(x_train, y_train, epochs = 10, batch_size = 32, validation_data=(x_test,y_test))


In [None]:
preds = model.evaluate(test_x, test_y)
print("Loss = " + str(preds))

Loss = 0.6930264830589294


In [None]:
# Dummy Training Code

x_train = tf.random.normal((1024,64), dtype='float32')
y_train = tf.cast(tf.random.categorical(tf.math.log([[0.5, 0.5]]), 1024)[0], 'float32')

our_model.compile(optimizer='adam', loss=tf.keras.losses.BinaryCrossentropy())

our_model.fit(x_train, y_train, epochs=5, batch_size=64)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f7ce821bd30>

In [None]:
# Saving your model

our_model.save('/content/Group_X_Model_Trained')

In [None]:
# Test saved model

loaded_model = tf.keras.models.load_model('/content/Group_X_Model_Trained')
loaded_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 1)                 65        
                                                                 
Total params: 65
Trainable params: 65
Non-trainable params: 0
_________________________________________________________________


In [None]:
# zip the model
!zip -r /content/Group_X_Model_Trained.zip /content/Group_X_Model_Trained

  adding: content/Group_X_Model_Trained/ (stored 0%)
  adding: content/Group_X_Model_Trained/variables/ (stored 0%)
  adding: content/Group_X_Model_Trained/variables/variables.index (deflated 51%)
  adding: content/Group_X_Model_Trained/variables/variables.data-00000-of-00001 (deflated 54%)
  adding: content/Group_X_Model_Trained/saved_model.pb (deflated 85%)
  adding: content/Group_X_Model_Trained/assets/ (stored 0%)
  adding: content/Group_X_Model_Trained/keras_metadata.pb (deflated 77%)
