<a href="https://colab.research.google.com/github/Jasleen8801/medical-prescription-recognition/blob/main/medical.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [25]:
from IPython.display import HTML, display

def set_css():
  display(HTML('''
  <style>
    pre {
        white-space: pre-wrap;
    }
  </style>
  '''))
get_ipython().events.register('pre_run_cell', set_css)

In [26]:
!pip install opencv-python
!pip install opencv-python-headless
!apt-get install -y xvfb x11-utils
!pip install pyvirtualdisplay
!pip install wget
!pip install emnist

Reading package lists... Done
Building dependency tree       
Reading state information... Done
x11-utils is already the newest version (7.7+5).
xvfb is already the newest version (2:1.20.13-1ubuntu1~20.04.8).
0 upgraded, 0 newly installed, 0 to remove and 15 not upgraded.


# **Approach 1**

In [27]:
# Import necessary libraries
import numpy as np
from PIL import ImageGrab, ImageTk, Image
from google.colab import files
import tkinter as tk
from google.colab.patches import cv2_imshow
from IPython.display import display, Javascript
import cv2
import time
import wget
import pandas as pd
from scipy.io import loadmat
import emnist
from scipy.optimize import minimize

In [28]:
# Functions for training the neural network
def neural_network(nn_params, input_layer_size, hidden_layer_size, num_labels, X, y, lamb):
  # weights split to Theta1 and Theta2
  Theta1 = np.reshape(nn_params[:hidden_layer_size * (input_layer_size + 1)],
                        (hidden_layer_size, input_layer_size + 1), order='F')
  Theta2 = np.reshape(nn_params[hidden_layer_size * (input_layer_size + 1):],
                        (num_labels, hidden_layer_size + 1), order='F')

  # Forward Propagation
  m = X.shape[0]  # no of training examples
  one_matrix = np.ones((m,1))
  X = np.append(one_matrix, X, axis=1) # Adding bias unit to first layer by appending ones to the left of X
  a1 = X # Activation of 1st layer = X
  z2 = np.dot(X, Theta1.transpose()) # Product of X and transpose of theta
  a2 = 1 / (1 + np.exp(-z2)) # Apply the sigmoid function to z2 to compute the activation of the second layer
  one_matrix = np.ones((m,1))
  a2 = np.append(one_matrix, a2, axis=1)
  z3 = np.dot(a2, Theta2.transpose())
  a3 = 1 / (1 + np.exp(-z3))

  # Changing y labels into vectors of bool
  y_vect = np.zeros((m, num_labels))
  for i in range(m):
    y_vect[i, int(y[i])] = 1

  # Calculating cost function
  J = (1 / m) * (np.sum(np.sum(-y_vect * np.log(a3) - (1 - y_vect) * np.log(1 - a3)))) + (lamb / (2 * m)) * (
                np.sum(np.sum(Theta1[:, 1:] ** 2)) + np.sum(np.sum(Theta2[:, 1:] ** 2)))

  # Backpropagation
  Delta3 = 3 - y_vect # error in o/p layer
  Delta2 = np.dot(Delta3, Theta2) * a2 * (1-a2) # error in hidden layer
  Delta2 = Delta2[:, 1:] # Remove the error for the bias unit in the hidden layer

  # Gradient Computation
  Theta1[:,0] = 0
  Theta1_grad = (1/m) * np.dot(Delta2.transpose(), a1) + (lamb/m) * Theta1
  Theta2[:,0] = 0
  Theta2_grad = (1/m) * np.dot(Delta3.transpose(), a2) + (lamb/m) * Theta2
  grad = np.concatenate((Theta1_grad.flatten(), Theta2_grad.flatten()))

  return J, grad

In [39]:
# Function for predicting the output
def predict(Theta1, Theta2, X):
    m = X.shape[0]
    one_matrix = np.ones((m, 1))
    X = np.append(one_matrix, X, axis=1)
    z2 = np.dot(X, Theta1.transpose())
    a2 = 1 / (1 + np.exp(-z2))
    one_matrix = np.ones((m, 1))
    a2 = np.append(one_matrix, a2, axis=1)
    z3 = np.dot(a2, Theta2.transpose())
    a3 = 1 / (1 + np.exp(-z3))
    p = np.argmax(a3, axis=1)
    return p


In [30]:
def initialise(a, b):
  epsilon = 0.15
  c = np.random.rand(a, b+1)*(2*epsilon)-epsilon
  return c

In [31]:
# Function to capture screen portion
def capture_screen(x, y, width, height):
  display(Javascript("""
    const sleep = (milliseconds) => {
      return new Promise(resolve => setTimeout(resolve, milliseconds))
    }
    async function capture(x, y, width, height) {
      let kernel = IPython.notebook.kernel;
      let result = await google.colab.kernel.invokeFunction('notebook.capture', [
        x, y, width, height
      ], {});
      let img_data = result.data['application/octet-stream'];
      const urlCreator = window.URL || window.webkitURL;
      const imageURL = urlCreator.createObjectURL(new Blob([img_data], {type; "image/jpeg"}));
      const img = document.createElement('img');
      img.src = imageURL;
      img.width = width;
      img.height = height;
      document.body.appendChild(img);
      await sleep(1000);
      img.remove();
    }
    capture({}, {}, {}, {})
  """))

In [32]:
# function to recognise prescription
def recognize_prescription():
  x = 100
  y = 100
  width = 500
  height = 500

  capture_screen(x, y, width, height)

  img = cv2.imread("screenshot.jpg")
  cv2.imshow(img)


In [33]:
wget.download('https://www.kaggle.com/datasets/crawford/emnist?select=emnist-balanced-train.csv', 'emnist-balanced-train.csv')
wget.download('https://www.kaggle.com/datasets/crawford/emnist?select=emnist-balanced-test.csv', 'emnist-balanced-test.csv')

'emnist-balanced-test (1).csv'

In [46]:
# data1 = pd.read_csv('emnist-balanced-train.csv')
# data2 = pd.read_csv('emnist-balanced-test.csv')
X_train, y_train = emnist.extract_training_samples('balanced')
X_test, y_test = emnist.extract_test_samples('balanced')
print(X_train.shape)
print(X_test.shape)

print("Data type:", X_train.dtype)
print("Minimum value:", np.min(X_train))
print("Maximum value:", np.max(X_train))

(112800, 28, 28)
(18800, 28, 28)
Data type: uint8
Minimum value: 0
Maximum value: 255


In [47]:
X_train = X_train.reshape((X_train.shape[0], -1), order='F')
X_test = X_test.reshape((X_test.shape[0], -1), order='F')

X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

X_train = X_train / 255.0
X_test = X_test / 255.0

for i in range(len(X_train)):
  ex = X_train[i,:].reshape((28,28), order='F')  # Use 'F' for column-major order
  X_train[i,:] = ex.flatten()

for i in range(len(X_test)):
  ex = X_test[i,:].reshape((28,28), order='F')  # Use 'F' for column-major order
  X_test[i,:] = ex.flatten()


In [48]:
y_train = y_train.flatten()
y_test = y_test.flatten()

In [49]:
m = X_train.shape[0]
input_layer_size = 784
hidden_layer_size = 130
num_labels = 47

# randomly initialising thetas
initial_theta1 = initialise(hidden_layer_size, input_layer_size)
initial_theta2 = initialise(num_labels, hidden_layer_size)

# unrolling parameters into a single column vector
initial_nn_params = np.concatenate((initial_theta1.flatten(), initial_theta2.flatten()))
maxiter = 800
lambda_reg = 0.1
myargs = (input_layer_size, hidden_layer_size, num_labels, X_train, y_train, lambda_reg)

# calling minimize function to minimize cost function and to train weights
results = minimize(neural_network, x0=initial_nn_params, args=myargs, options={
    'disp': True,
    'maxiter': maxiter
}, method="L-BFGS-B", jac=True)

nn_params = results["x"]

# Weights are split back to theta1 and theta2
Theta1 = np.reshape(nn_params[:hidden_layer_size * (input_layer_size + 1)],
                    (hidden_layer_size, input_layer_size + 1))  # shape = (100, 785)
Theta2 = np.reshape(nn_params[hidden_layer_size * (input_layer_size + 1):],
                    (num_labels, hidden_layer_size + 1))  # shape = (10, 101)

pred = predict(Theta1, Theta2, X_test)
print(f'Test Set Accuracy: {(np.mean(pred == y_test) * 100)}')

pred = predict(Theta1, Theta2, X_train)
print(f'Training Set Accuracy: {(np.mean(pred == y_train) * 100)}')

tp = 0
for i in range(len(pred)):
  if pred[i] == y_train[i]:
    tp += 1
fp = len(y_train) - tp
print(f'Precision: {tp/(tp+fp)}')

np.savetxt('Theta1.txt', Theta1, delimiter=',')
np.savetxt('Theta2.txt', Theta2, delimiter=',')

  a3 = 1 / (1 + np.exp(-z3))
  J = (1 / m) * (np.sum(np.sum(-y_vect * np.log(a3) - (1 - y_vect) * np.log(1 - a3)))) + (lamb / (2 * m)) * (
  J = (1 / m) * (np.sum(np.sum(-y_vect * np.log(a3) - (1 - y_vect) * np.log(1 - a3)))) + (lamb / (2 * m)) * (
  a2 = 1 / (1 + np.exp(-z2)) # Apply the sigmoid function to z2 to compute the activation of the second layer
  a2 = 1 / (1 + np.exp(-z2))
  a3 = 1 / (1 + np.exp(-z3))


Test Set Accuracy: 2.127659574468085
Training Set Accuracy: 2.127659574468085
Precision: 0.02127659574468085


In [45]:
# Gradient Descent
m = X_train.shape[0]
input_layer_size = 784
hidden_layer_size = 130
num_labels = 47

initial_theta_1 = initialise(hidden_layer_size, input_layer_size)
initial_theta_2 = initialise(num_labels, hidden_layer_size)

initial_nn_params - np.concatenate((initial_theta1.flatten(), initial_theta2.flatten()))

alpha = 0.01
num_iterations = 100
lambda_reg = 0.1

for i in range(num_iterations):
  cost, grad = neural_network(initial_nn_params, input_layer_size, hidden_layer_size, num_labels, X_train, y_train, lambda_reg)
  initial_nn_params -= alpha * grad

Theta1 = np.reshape(initial_nn_params[:hidden_layer_size * (input_layer_size + 1)],
                    (hidden_layer_size, input_layer_size + 1))
Theta2 = np.reshape(initial_nn_params[hidden_layer_size * (input_layer_size + 1):],
                    (num_labels, hidden_layer_size + 1))

pred = predict(Theta1, Theta2, X_test)
print(f'Test Set Accuracy: {(np.mean(pred == y_test) * 100)}')

pred = predict(Theta1, Theta2, X_train)
print(f'Training Set Accuracy: {(np.mean(pred == y_train) * 100)}')

tp = 0
for i in range(len(pred)):
    if pred[i] == y_train[i]:
        tp += 1
fp = len(y_train) - tp
print(f'Precision: {tp / (tp + fp)}')

np.savetxt('Theta1.txt', Theta1, delimiter=',')
np.savetxt('Theta2.txt', Theta2, delimiter=',')

  a3 = 1 / (1 + np.exp(-z3))
  J = (1 / m) * (np.sum(np.sum(-y_vect * np.log(a3) - (1 - y_vect) * np.log(1 - a3)))) + (lamb / (2 * m)) * (
  J = (1 / m) * (np.sum(np.sum(-y_vect * np.log(a3) - (1 - y_vect) * np.log(1 - a3)))) + (lamb / (2 * m)) * (
  a3 = 1 / (1 + np.exp(-z3))


Test Set Accuracy: 2.127659574468085
Training Set Accuracy: 2.127659574468085
Precision: 0.02127659574468085


# **Approach 2**

In [52]:
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import emnist
from scipy.optimize import minimize

# Functions for training the neural network
def neural_network(nn_params, input_layer_size, hidden_layer_size, num_labels, X, y, lamb):
    Theta1 = np.reshape(nn_params[:hidden_layer_size * (input_layer_size + 1)],
                        (hidden_layer_size, input_layer_size + 1), order='F')
    Theta2 = np.reshape(nn_params[hidden_layer_size * (input_layer_size + 1):],
                        (num_labels, hidden_layer_size + 1), order='F')

    m = X.shape[0]
    one_matrix = np.ones((m, 1))
    X = np.append(one_matrix, X, axis=1)
    a1 = X
    z2 = np.dot(X, Theta1.transpose())
    a2 = 1 / (1 + np.exp(-z2))
    a2 = np.append(one_matrix, a2, axis=1)
    z3 = np.dot(a2, Theta2.transpose())
    a3 = 1 / (1 + np.exp(-z3))

    y_vect = np.zeros((m, num_labels))
    for i in range(m):
        y_vect[i, int(y[i])] = 1

    J = (1 / m) * (np.sum(np.sum(-y_vect * np.log(a3) - (1 - y_vect) * np.log(1 - a3)))) + (lamb / (2 * m)) * (
                np.sum(np.sum(Theta1[:, 1:] ** 2)) + np.sum(np.sum(Theta2[:, 1:] ** 2)))

    Delta3 = a3 - y_vect
    Delta2 = np.dot(Delta3, Theta2) * a2 * (1 - a2)
    Delta2 = Delta2[:, 1:]

    Theta1[:, 0] = 0
    Theta1_grad = (1 / m) * np.dot(Delta2.transpose(), a1) + (lamb / m) * Theta1
    Theta2[:, 0] = 0
    Theta2_grad = (1 / m) * np.dot(Delta3.transpose(), a2) + (lamb / m) * Theta2
    grad = np.concatenate((Theta1_grad.flatten(), Theta2_grad.flatten()))

    return J, grad

def predict(Theta1, Theta2, X):
    m = X.shape[0]
    one_matrix = np.ones((m, 1))
    X = np.append(one_matrix, X, axis=1)
    z2 = np.dot(X, Theta1.transpose())
    a2 = 1 / (1 + np.exp(-z2))
    a2 = np.append(one_matrix, a2, axis=1)
    z3 = np.dot(a2, Theta2.transpose())
    a3 = 1 / (1 + np.exp(-z3))
    p = np.argmax(a3, axis=1)
    return p

def initialise(a, b):
    epsilon = 0.15
    c = np.random.rand(a, b+1) * (2 * epsilon) - epsilon
    return c

X_train, y_train = emnist.extract_training_samples('balanced')
X_test, y_test = emnist.extract_test_samples('balanced')

# Reshape and normalize the data
X_train = X_train.reshape((X_train.shape[0], -1), order='F')
X_test = X_test.reshape((X_test.shape[0], -1), order='F')

scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

y_train = y_train.flatten()
y_test = y_test.flatten()

m = X_train.shape[0]
input_layer_size = X_train.shape[1]
hidden_layer_size = 130
num_labels = len(np.unique(y_train))

initial_theta1 = initialise(hidden_layer_size, input_layer_size)
initial_theta2 = initialise(num_labels, hidden_layer_size)

initial_nn_params = np.concatenate((initial_theta1.flatten(), initial_theta2.flatten()))

maxiter = 800
lambda_reg = 0.1
myargs = (input_layer_size, hidden_layer_size, num_labels, X_train, y_train, lambda_reg)

results = minimize(neural_network, x0=initial_nn_params, args=myargs, options={
    'disp': True,
    'maxiter': maxiter
}, method="L-BFGS-B", jac=True)

nn_params = results["x"]

Theta1 = np.reshape(nn_params[:hidden_layer_size * (input_layer_size + 1)],
                    (hidden_layer_size, input_layer_size + 1), order='F')
Theta2 = np.reshape(nn_params[hidden_layer_size * (input_layer_size + 1):],
                    (num_labels, hidden_layer_size + 1), order='F')

# Predict and evaluate accuracy
pred_test = predict(Theta1, Theta2, X_test)
accuracy_test = np.mean(pred_test == y_test) * 100

pred_train = predict(Theta1, Theta2, X_train)
accuracy_train = np.mean(pred_train == y_train) * 100

print(f'Test Set Accuracy: {accuracy_test}')
print(f'Training Set Accuracy: {accuracy_train}')

# Calculate precision
tp = np.sum(pred_train == y_train)
fp = len(y_train) - tp
precision = tp / (tp + fp)
print(f'Precision: {precision}')

# Save the learned parameters
np.savetxt('Theta1.txt', Theta1, delimiter=',')
np.savetxt('Theta2.txt', Theta2, delimiter=',')


Test Set Accuracy: 1.776595744680851
Training Set Accuracy: 1.702127659574468
Precision: 0.01702127659574468


# **Approach 3**

In [53]:
import numpy as np
from PIL import ImageGrab, ImageTk, Image
from google.colab import files
import tkinter as tk
from google.colab.patches import cv2_imshow
from IPython.display import display, Javascript
import cv2
import time
import wget
import pandas as pd
from scipy.io import loadmat
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
import emnist

# Load MNIST data
X_train, y_train = emnist.extract_training_samples('mnist')
X_test, y_test = emnist.extract_test_samples('mnist')

# Preprocess the data
X_train = X_train.reshape(X_train.shape[0], -1) / 255.0
X_test = X_test.reshape(X_test.shape[0], -1) / 255.0
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

# Define the model architecture
model = Sequential()
model.add(Dense(256, activation='relu', input_shape=(784,)))
model.add(Dense(128, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(10, activation='softmax'))

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, batch_size=128, epochs=20, validation_data=(X_test, y_test))

# Evaluate the model
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f'Test Loss: {test_loss:.4f}')
print(f'Test Accuracy: {test_accuracy:.4f}')


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Test Loss: 0.0703
Test Accuracy: 0.9861


# **Approach 4**

In [57]:
import numpy as np
import matplotlib.pyplot as plt
import urllib.request
import gzip

# Function to load MNIST data
def load_mnist():
    base_url = 'http://yann.lecun.com/exdb/mnist/'
    files = [
        ('train_images', 'train-images-idx3-ubyte.gz'),
        ('train_labels', 'train-labels-idx1-ubyte.gz'),
        ('test_images', 't10k-images-idx3-ubyte.gz'),
        ('test_labels', 't10k-labels-idx1-ubyte.gz')
    ]

    data = {}
    for name, filename in files:
        urllib.request.urlretrieve(base_url + filename, filename)
        with gzip.open(filename, 'rb') as f:
            if 'images' in name:
                data[name] = np.frombuffer(f.read(), np.uint8, offset=16).reshape(-1, 784)
            else:
                data[name] = np.frombuffer(f.read(), np.uint8, offset=8)

    return data['train_images'], data['train_labels'], data['test_images'], data['test_labels']

# Load MNIST data
X_train, y_train, X_test, y_test = load_mnist()

# Normalize the data
X_train = X_train / 255.0
X_test = X_test / 255.0

# Convert labels to integers
y_train = y_train.astype(int)
y_test = y_test.astype(int)

# Define neural network architecture
class NeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size):
        self.W1 = np.random.randn(input_size, hidden_size) * 0.01
        self.b1 = np.zeros(hidden_size)
        self.W2 = np.random.randn(hidden_size, output_size) * 0.01
        self.b2 = np.zeros(output_size)

    def forward(self, X):
        self.z1 = np.dot(X, self.W1) + self.b1
        self.a1 = np.maximum(0, self.z1)  # ReLU activation function
        self.z2 = np.dot(self.a1, self.W2) + self.b2
        exp_scores = np.exp(self.z2)
        self.probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)

    def backward(self, X, y, learning_rate):
        m = X.shape[0]
        one_hot = np.zeros((m, self.W2.shape[1]))
        one_hot[np.arange(m), y] = 1
        delta3 = self.probs - one_hot
        dW2 = np.dot(self.a1.T, delta3)
        db2 = np.sum(delta3, axis=0)
        delta2 = np.dot(delta3, self.W2.T) * (self.a1 > 0)
        dW1 = np.dot(X.T, delta2)
        db1 = np.sum(delta2, axis=0)
        self.W1 -= learning_rate * dW1
        self.b1 -= learning_rate * db1
        self.W2 -= learning_rate * dW2
        self.b2 -= learning_rate * db2

    def predict(self, X):
        self.forward(X)
        return np.argmax(self.probs, axis=1)

# Set hyperparameters
input_size = X_train.shape[1]
hidden_size = 64  # Reduced number of hidden units
output_size = 10
learning_rate = 0.01
epochs = 20
batch_size = 128

# Create the neural network
nn = NeuralNetwork(input_size, hidden_size, output_size)

# Training loop
for epoch in range(epochs):
    # Shuffle the training data
    permutation = np.random.permutation(X_train.shape[0])
    X_train = X_train[permutation]
    y_train = y_train[permutation]

    # Mini-batch training
    for i in range(0, X_train.shape[0], batch_size):
        batch_X = X_train[i:i+batch_size]
        batch_y = y_train[i:i+batch_size]

        # Forward and backward propagation
        nn.forward(batch_X)
        nn.backward(batch_X, batch_y, learning_rate)

    # Print the training loss and accuracy for each epoch
    train_pred = nn.predict(X_train)
    train_acc = np.mean(train_pred == y_train)
    print(f'Epoch {epoch+1}/{epochs} - Training Accuracy: {train_acc:.4f}')

# Evaluate the model on the test set
test_pred = nn.predict(X_test)
test_acc = np.mean(test_pred == y_test)
print(f'Test Accuracy: {test_acc:.4f}')


Epoch 1/20 - Training Accuracy: 0.9169
Epoch 2/20 - Training Accuracy: 0.9389
Epoch 3/20 - Training Accuracy: 0.9500
Epoch 4/20 - Training Accuracy: 0.9485
Epoch 5/20 - Training Accuracy: 0.9514
Epoch 6/20 - Training Accuracy: 0.9589
Epoch 7/20 - Training Accuracy: 0.9587
Epoch 8/20 - Training Accuracy: 0.9619
Epoch 9/20 - Training Accuracy: 0.9637
Epoch 10/20 - Training Accuracy: 0.9645
Epoch 11/20 - Training Accuracy: 0.9630
Epoch 12/20 - Training Accuracy: 0.9644
Epoch 13/20 - Training Accuracy: 0.9632
Epoch 14/20 - Training Accuracy: 0.9690
Epoch 15/20 - Training Accuracy: 0.9678
Epoch 16/20 - Training Accuracy: 0.9687
Epoch 17/20 - Training Accuracy: 0.9682
Epoch 18/20 - Training Accuracy: 0.9750
Epoch 19/20 - Training Accuracy: 0.9654
Epoch 20/20 - Training Accuracy: 0.9695
Test Accuracy: 0.9564
