In [3]:
#CNN example pytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CNN(nn.Module):
    def __init__(self):
        super(CNN,super).__init__()
        self.conv1=nn.Conv2d(3,6,5)#input,output,m(mxm dimensions) kernel
        self.pool=nn.MaxPool2d(2,2)#reduce the size of the feature map to 2X2
        self.conv2=nn.Conv2d(6,16,5)#16 output channels
        self.fc1=nn.Linear(16*5*5,120)
        self.fc2=nn.Linear(120,84)
        self.fc3=nn.Linear(84,10)#10 output classes hence 10 neurons
    def forward(self,x):
        x=self.pool(F.relu(self.conv1(x)))
        x=self.pool(F.relu(self.conv2(x)))
        x=x.view(-1,16*5*5)#reshapes the data -1 lets the batch size be chosen by pytorch and 16*5*5 is the reshaped size
        x=F.relu(self.fc1(x))
        x=F.relu(self.fc2(x))
        return F.softmax(self.fc3(x),dim=1)#softmax converts the raw values to probabilities hence returning the probabilities for all classes

In [8]:
#training using the CIFAR-10 dataset (defining the data loaders)
import torchvision
import torchvision.transforms as transforms
import torch

transform=transforms.Compose([
    transforms.ToTensor(),#images to range [0,1]
    transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))#normalize to range [-1,1]
])

trainset=torchvision.datasets.CIFAR10(root='./data',train=True,download=True,transform=transform)# to dowload training dataset
trainloader=torch.utils.data.DataLoader(trainset,batch_size=4,shuffle=True,num_workers=2)#load the data in batches
testset=torchvision.datasets.CIFAR10(root='./data',train=False,download=True,transform=transform)#for test data
testloader=torch.utils.data.DataLoader(testset,batch_size=4,shuffle=True,num_workers=2)#dataloader for test data

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

In [13]:
#defining a CNN model using tensorflow
from tensorflow.keras.layers import Dense,Input,Conv2D,MaxPooling2D
from tensorflow.keras.models import Sequential
import tensorflow as tf
model=Sequential()
model.add(Input(shape=(32,32,3)))#32x32 pixels with 3 color channels
model.add(Conv2D(32,(3,3),activation='relu'))#kernel is 3x3 and there are 32 kernels
model.add(MaxPooling2D((2,2)))#2x2 dimensions for applying pooling 
model.add(Conv2D(64,(3,3),activation='relu'))#64 kernels kernels are 3x3
model.add(MaxPooling2D((2,2)))#2x2 dimensions for applying pooling 
model.add(Conv2D(64,(3,3),activation='relu'))

model.add(layers.Flatten())#flatten the output of the conv layers
model.add(Dense(64,activation='relu'))#added a fully connected layer
model.add(Dense(10))#output layer

#here we have 3 conv layers 1st has 32 kernels rest 2 have 64 kernels when we flatten the output then we created a fully connected layer and output layer
#for classification tasks we generally add 1 or 2 fully connected layer
#in a conv layer the ouytput is 3d hence we need to flatten it as the fully connected layer expects 1d input


In [None]:
#in pytorch 
import torch 
import torch.nn as nn
import torch.nn.functional as F
class net(nn.Module):
    def __init__(self):
        super(net,self).__init__()
        self.conv1=nn.Conv2d(3,32,3)#input channels,output channels,kernels size(3x3)
        self.pool=nn.MaxPool2d(2,2)#converts feature map to 2x2 matrix
        self.conv2=nn.Conv2d(32,64,3)#same as above
        self.pool=nn.MaxPool2d(2,2)
        self.fc1=nn.Linear(64*5*5,120)#input size , output channels also the size here should be 64*6*6 look into it
        self.fc2=nn.Linear(120,84)
        self.fc3=nn.Linear(84,10)
        
    def forward(self,x):
        x=self.pool(F.relu(self.conv1(x)))#applied activation relu function on the conv layer
        x=self.pool(F.relu(self.conv2(x)))
        print(x.shape())
        x=x.view(-1,64*5*5)#flatten the tensor from 64 channel of 5x5 feature map to 64*5*5    
        x=F.relu(self.fc1(x))
        x=F.relu(self.fc2(x))
        return F.softmax(self.fc3(x),dim=1)#applies softmax accross each row

net1=net()


In [4]:
#using a pretrained cnn in keras for image classification
from keras.applications.resnet50 import ResNet50,preprocess_input,decode_predictions
from keras.preprocessing import image#provides utilities to preprocess image data
import numpy as np
model=ResNet50(weights='imagenet')#load the model resnet with weights trained on ImageNet dataset
img=image.load_img('image.jpg',target_size=(224,224))#loads existing image and converts it into 224x224 image
x=image.img_to_array(img)#converts to numpy array
x=np.expand_dims(x,axis=0)#adds an extra dimension shape becomes = (1,height,width,channels)
x=preprocess_input(x)
prediction=model.predict(x)
print(decode_predictions(prediction,top=3)[0])

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step
Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/imagenet_class_index.json
[1m35363/35363[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 53us/step
[('n02099601', 'golden_retriever', np.float32(0.9884454)), ('n02094114', 'Norfolk_terrier', np.float32(0.002462029)), ('n02104029', 'kuvasz', np.float32(0.0017782694))]


In [None]:
#semantic segmentation using pytorch pretrained model
import torch
from torchvision import models,transforms
from PIL import Image
import numpy as np
model=models.segmentation.fcn_resnet101(pretrained=True)#load the pretrained model
inpimg=Image.open('image.jpg')
preprocess=transforms.Compose([#combines the preprocessing steps into a single pipeline
    transforms.ToTensor(),#converts from numpy array to pytorch tensor
    transforms.Normalize(mean=[0.485,0.456,0.406],std=[0.229,0.224,0.225])#normalizes pixel values such that mean and std given
])
input_t=preprocess(inpimg)#starts the pipeline
inputbatch=input_t.unsqueeze(0)#adds batch dimension at 0th index
with torch.no_grad():
    output=model(inputbatch)
outputpred=output.argmax(0)#argmax returns the index of the highest value in the array

#visualizing the segmentation
pallete=torch.tensor([2**25-1,2**15-1,2**21-1])#color encoding values which will be used to give consistent colors for objects
colors=torch.as_tensor([i for i in range (21)])[:,None]*pallete #[:,None] reshapes the data to (21,1) for broadcasting and multiplies with the pallete
#so we end up with a psuedorandom color for the class label so that each class is a clearly different one

colors =(colors%255).numpy().astype("uint8")
r=Image.fromarray(outputpred.byte().cpu().numpy())#makes the output to be visualized
r.putpalette(colors)
r.show()



KeyboardInterrupt: 

Need to check out GANs for image generations

In [None]:
#face recognition general example using resnet pretrained model

import torch 
from torchvision import models,transforms
from PIL import Image
import numpy as np

model=models.resnet50(pretrained=True).eval()#eval switches model to eval mode disables dropouts so all neurons stay active

input=Image.open("myface.jpg")#i dont have this image here since its just a general example and nothing more
other=Image.open("otheface.jpg")#this too same as above

preprocess=transforms.Compose([
    transforms.Resize(256),#shortest size to 256
    transforms.CenterCrop(224),#224x224 center img cropped out
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485,0.456,0.406],std=[0.229,0.224,0.225])
])

inputtensor=preprocess(input)
inputbatch=inputtensor.unsqueeze(0)
othertensor=preprocess(other)
otherbatch=othertensor.unsqueeze(0)

with torch.no_grad():
    inputop=model(inputbatch)#face features as a tensor
    otherop=model(otherbatch)#face features as a tensor

KeyboardInterrupt: 

In [6]:
#exercises

#1 simple cnn on CIFAR

import tensorflow as tf
from tensorflow.keras import datasets,layers,models
import matplotlib.pyplot as plt

(trainimg,trainlabels),(testimg,testlables)=datasets.cifar10.load_data()
model=models.Sequential()
#below is the input layer
model.add(layers.Conv2D(32,(3,3),activation='relu',input_shape=(32,32,3)))#output of this becomes (30,30,32) without padding
model.add(layers.MaxPooling2D((2,2)))#done to halve the dimension to (15,15,32) 32 diff feature maps
model.add(layers.Conv2D(64,(3,3),activation='relu'))
model.add(layers.MaxPooling2D((2,2)))#max pooling shrinks the size of data and preserves the important features
#now we add dense layers on the top
model.add(layers.Flatten())#the dimension become all the dimensions multiplied

model.add(layers.Dense(64,activation='relu'))
model.add(layers.Dense(10))#as those are the total number of classes

model.compile(optimizer='adam',loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),metrics=['accuracy'])

history=model.fit(trainimg,trainlabels,epochs=10,validation_data=(testimg,testlables))


Epoch 1/10
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 12ms/step - accuracy: 0.2998 - loss: 3.0905 - val_accuracy: 0.5117 - val_loss: 1.3529
Epoch 2/10
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 12ms/step - accuracy: 0.5314 - loss: 1.3055 - val_accuracy: 0.5572 - val_loss: 1.2275
Epoch 3/10
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 12ms/step - accuracy: 0.5917 - loss: 1.1477 - val_accuracy: 0.5651 - val_loss: 1.2259
Epoch 4/10
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 12ms/step - accuracy: 0.6302 - loss: 1.0516 - val_accuracy: 0.6048 - val_loss: 1.1279
Epoch 5/10
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 12ms/step - accuracy: 0.6634 - loss: 0.9657 - val_accuracy: 0.6296 - val_loss: 1.0996
Epoch 6/10
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 12ms/step - accuracy: 0.6901 - loss: 0.8934 - val_accuracy: 0.6506 - val_loss: 1.0445
Epoc

KeyboardInterrupt: 

In [7]:
testloss,testacc=model.evaluate(testimg,testlables,verbose=2)
print(testacc)

313/313 - 2s - 7ms/step - accuracy: 0.6491 - loss: 1.0852
0.6491000056266785


In [8]:
#to visualize
model.summary()

In [9]:
#plotting training and validation accuracy
plt.plot(history.history['accuracy'],label='accuracy')
plt.plot(history.history['val_accuracy'],label='val_accuracy')

NameError: name 'history' is not defined