#Libraries and function for preprocessing

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import RandomOverSampler

In [None]:
def scale_dataset(dataframe, oversample = False ):  #default value
  X = dataframe[dataframe.columns[:-1]].values
  y = dataframe[dataframe.columns[-1]].values

  scaler = StandardScaler()
  X = scaler.fit_transform(X)

  if oversample :
    ros = RandomOverSampler()
    X,y = ros.fit_resample(X,y) #Take more of the class with less data samples and keep sampling them

  data = np.hstack((X, np.reshape(y, (-1,1))))

  return data, X, y

#Rice

Data : https://archive.ics.uci.edu/dataset/545/rice+cammeo+and+osmancik

In [None]:
cols = ["Area Integer", "Perimeter Real", "Major_Axis_Length Real",
        "Minor_Axis_Length Real", "Eccentricity	Real" ,"Convex_Area	Integer", "Extent Real", "Class" ]

In [None]:
df = pd.read_csv("rice.csv", names = cols)


In [None]:
df['Class'] = (df['Class'] == "Cammeo").astype(int)
df['Class'].unique()

array([1, 0])

In [None]:
train, valid, test = np.split(df.sample(frac=1), [int(0.6*len(df)) , int(0.8*len(df))])


In [None]:
train, Xtrain, ytrain = scale_dataset(train, True)
valid, Xvalid, yvalid = scale_dataset(valid, False)
test, Xtest, ytest = scale_dataset(test, False)

In [None]:
#KNN
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report

In [None]:
knnModel = KNeighborsClassifier(n_neighbors=5)
knnModel.fit(Xtrain,ytrain)
ypredknn = knnModel.predict(Xtest)
print(classification_report(ytest,ypredknn))

              precision    recall  f1-score   support

           0       0.92      0.90      0.91       428
           1       0.87      0.90      0.89       334

    accuracy                           0.90       762
   macro avg       0.90      0.90      0.90       762
weighted avg       0.90      0.90      0.90       762



In [None]:
#NB
from sklearn.naive_bayes import GaussianNB

In [None]:
nbModel = GaussianNB()
nbModel.fit(Xtrain,ytrain)
yprednb = nbModel.predict(Xtest)
print(classification_report(ytest,yprednb))

              precision    recall  f1-score   support

           0       0.91      0.94      0.93       428
           1       0.92      0.88      0.90       334

    accuracy                           0.92       762
   macro avg       0.92      0.91      0.91       762
weighted avg       0.92      0.92      0.92       762



In [None]:
#LR
from sklearn.linear_model import LogisticRegression

In [None]:
lrModel = LogisticRegression()
lrModel.fit(Xtrain,ytrain)
ypredlr = lrModel.predict(Xtest)
print(classification_report(ytest, ypredlr))

              precision    recall  f1-score   support

           0       0.93      0.93      0.93       428
           1       0.92      0.91      0.91       334

    accuracy                           0.93       762
   macro avg       0.92      0.92      0.92       762
weighted avg       0.93      0.93      0.93       762



In [None]:
#SVM
from sklearn.svm import SVC

In [None]:
svModel = SVC()
svModel.fit(Xtrain,ytrain)
ypredsv = svModel.predict(Xtest)
print(classification_report(ytest,ypredsv))

              precision    recall  f1-score   support

           0       0.93      0.93      0.93       428
           1       0.90      0.91      0.91       334

    accuracy                           0.92       762
   macro avg       0.92      0.92      0.92       762
weighted avg       0.92      0.92      0.92       762



#Breast Cancer

Data : https://archive.ics.uci.edu/dataset/15/breast+cancer+wisconsin+original

In [None]:
cols = ["Sample code number",
        "Clump Thickness",
        "Uniformity of Cell Size",
        "Uniformity of Cell Shape",
        "Marginal Adhesion",
        "Single Epithelial Cell Size",
        "Bare Nuclei",
        "Bland Chromatin",
        "Normal Nucleoli",
        "Mitoses",
        "Class"]

In [None]:
df = pd.read_csv("breast-cancer-wisconsin.data", names = cols)
df.pop('Sample code number')

0      1000025
1      1002945
2      1015425
3      1016277
4      1017023
        ...   
678     776715
679     841769
680     888820
681     897471
682     897471
Name: Sample code number, Length: 683, dtype: int64

In [None]:
df['Class'] = (df['Class'] == 4).astype(int)
df.head()

Unnamed: 0,Clump Thickness,Uniformity of Cell Size,Uniformity of Cell Shape,Marginal Adhesion,Single Epithelial Cell Size,Bare Nuclei,Bland Chromatin,Normal Nucleoli,Mitoses,Class
0,5,1,1,1,2,1,3,1,1,0
1,5,4,4,5,7,10,3,2,1,0
2,3,1,1,1,2,2,3,1,1,0
3,6,8,8,1,3,4,3,7,1,0
4,4,1,1,3,2,1,3,1,1,0


In [None]:
train, test = np.split(df.sample(frac=1), [int(0.8*len(df))])

In [None]:
train, Xtrain, ytrain = scale_dataset(train, True)
test, Xtest, ytest = scale_dataset(test, False)

In [None]:
import tensorflow as tf

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Dense(128, activation = tf.keras.activations.relu, input_shape = (9,)),
    tf.keras.layers.Dense(128,activation = tf.keras.activations.relu),
    tf.keras.layers.Dense(1,activation = tf.keras.activations.sigmoid)
    #tf.keras.layers.Dense(2,activation = tf.keras.activations.softmax)
])

model.compile(optimizer = 'adam',
              #loss = tf.keras.losses.sparse_categorical_crossentropy,
              loss = tf.keras.losses.binary_crossentropy,
              metrics = ['accuracy']
             )

#Can't use binary cross entropy if output layer is more than 1 neuron. if using more than one sparse_categorical_crossentropy
#Softmax will work for more than one neuron only cannot be 1. If using 1 neuron use sigmoid

In [None]:
model.fit(Xtrain,ytrain,epochs = 10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7b736ffc3850>

In [None]:
model.evaluate(Xtest,ytest)



[0.07647543400526047, 0.970802903175354]

In [None]:
ytrain.shape

(722,)

In [None]:
ytest.shape

(137,)

#Fungi Image Processing

Data : https://archive.ics.uci.edu/dataset/773/defungi

In [None]:
import tensorflow as tf

In [None]:
!unzip /content/drive/MyDrive/defungi.zip  #Puts it in the content folder by default also

In [None]:
ds = tf.keras.utils.image_dataset_from_directory(
    'defungi',
    labels='inferred',
    label_mode='int',
    batch_size=32,
    image_size=(224, 224),
)

Found 9114 files belonging to 5 classes.


In [None]:
import tensorflow_datasets as tfds

In [None]:
ds

<_BatchDataset element_spec=(TensorSpec(shape=(None, 224, 224, 3), dtype=tf.float32, name=None), TensorSpec(shape=(None,), dtype=tf.int32, name=None))>

In [None]:
from keras.layers.pooling.max_pooling2d import MaxPool2D
#Making a CNN

model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(32,(3,3),activation = 'relu', input_shape = (224,224,3)),
    tf.keras.layers.MaxPooling2D((2,2)),
    tf.keras.layers.Conv2D(64,(3,3),activation = 'relu'),
    tf.keras.layers.MaxPooling2D((2,2)),
    tf.keras.layers.Conv2D(64,(3,3),activation = 'relu'),
    tf.keras.layers.MaxPooling2D((2,2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(64,activation = 'relu'),
    tf.keras.layers.Dense(6,activation = tf.keras.activations.softmax)
])


model.compile(optimizer = 'adam',
              loss = tf.keras.losses.sparse_categorical_crossentropy,
              metrics = ['accuracy'])

In [None]:
model.fit(ds,epochs = 5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7d601a2d6ce0>

##Pretrained CNN

In [None]:
IMG_SHAPE = (224, 224, 3)

# Create the base model from the pre-trained model MobileNet V2
base_model = tf.keras.applications.MobileNetV2(input_shape=IMG_SHAPE,
                                               include_top=False,
                                               weights='imagenet')

base_model.trainable = False

In [None]:
global_average_pooling = tf.keras.layers.GlobalAveragePooling2D() #For flattening the data that base model will forward
prediction_layer = tf.keras.layers.Dense(6,activation = 'softmax')

In [None]:
model_pretrained = tf.keras.Sequential([
    base_model,
    global_average_pooling,
    tf.keras.layers.Dense(64,activation = 'relu'),
    prediction_layer
])

In [None]:
model_pretrained.compile(optimizer = 'adam',
              loss = tf.keras.losses.sparse_categorical_crossentropy,
              metrics = ['accuracy'])

In [None]:
model_pretrained.fit(ds,epochs = 10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7d601a3117e0>

Achieving 48 percent accuracy with my own covnet and the pretrained covnet reaches upto 77. So not too shabby :)

#Drowsiness Detection

Data : https://www.kaggle.com/datasets/hazemfahmy/openned-closed-eyes

In [None]:
import tensorflow as tf

In [None]:
!unzip /content/drive/MyDrive/TrainingData/Drowsiness/TestSet.zip
!unzip /content/drive/MyDrive/TrainingData/Drowsiness/TrainingSet.zip

In [None]:
train_ds = tf.keras.utils.image_dataset_from_directory(
    'TrainingSet',
    labels='inferred',
    label_mode='int',
    batch_size=32,
    image_size=(224, 224),
)

test_ds = tf.keras.utils.image_dataset_from_directory(
    'TestSet',
    labels='inferred',
    label_mode='int',
    batch_size=32,
    image_size=(224, 224),
)

Found 1704 files belonging to 2 classes.
Found 4232 files belonging to 2 classes.


In [None]:
train_ds


<_BatchDataset element_spec=(TensorSpec(shape=(None, 224, 224, 3), dtype=tf.float32, name=None), TensorSpec(shape=(None,), dtype=tf.int32, name=None))>

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(32,(3,3),activation = 'relu', input_shape = (224,224,3)),
    tf.keras.layers.MaxPooling2D((2,2)),
    tf.keras.layers.Conv2D(64,(3,3),activation = 'relu'),
    tf.keras.layers.MaxPooling2D((2,2)),
    tf.keras.layers.Conv2D(64,(3,3),activation = 'relu'),
    tf.keras.layers.MaxPooling2D((2,2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(64,activation = 'relu'),
    tf.keras.layers.Dense(2,activation = tf.keras.activations.softmax)
])


model.compile(optimizer = 'adam',
              loss = tf.keras.losses.sparse_categorical_crossentropy,
              metrics = ['accuracy'])

In [None]:
model.fit(train_ds,epochs = 5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7d601f652410>

In [None]:
model.evaluate(test_ds)



[0.20272010564804077, 0.9279300570487976]

WOW

In [None]:
IMG_SHAPE = (224, 224, 3)

# Create the base model from the pre-trained model MobileNet V2
base_model = tf.keras.applications.MobileNetV2(input_shape=IMG_SHAPE,
                                               include_top=False,
                                               weights='imagenet')

base_model.trainable = False

global_average_pooling = tf.keras.layers.GlobalAveragePooling2D() #For flattening the data that base model will forward
prediction_layer = tf.keras.layers.Dense(2,activation = 'softmax')

In [None]:
model_pretrained = tf.keras.Sequential([
    base_model,
    global_average_pooling,
    tf.keras.layers.Dense(64,activation = 'relu'),
    prediction_layer
])

model_pretrained.compile(optimizer = 'adam',
              loss = tf.keras.losses.sparse_categorical_crossentropy,
              metrics = ['accuracy'])

In [None]:
model_pretrained.fit(train_ds,epochs = 5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7d601f9274f0>

In [None]:
model_pretrained.evaluate(test_ds)



[0.1874774545431137, 0.929347813129425]

Not bad. Almost as good as pretrained. 92.7 vs 92.9