In [2]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
data_path = "/content/drive/My Drive/Handwriting_dataset/"


In [4]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
import os
from PIL import Image
import tensorflow as tf


In [5]:
input_dir = "/content/drive/My Drive/Handwriting_dataset/"
output_dir = "/content/drive/My Drive/processed_characters"

In [50]:
if not os.path.exists(output_dir):
  os.makedirs(output_dir)

def preprocess_images():
  for img_name in os.listdir(input_dir):
    img_path = os.path.join(input_dir, img_name)
    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)

    _,img_bin = cv2.threshold(img, 128, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
    contours,_ = cv2.findContours(img_bin, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    for i,contour in enumerate(contours):
      x,y,w,h = cv2.boundingRect(contour)
      char_img = img_bin[y:y+h, x:x+w]
      char_img = cv2.resize(char_img,(28,28))
      char_img = cv2.copyMakeBorder(char_img,4,4,4,4,cv2.BORDER_CONSTANT,value=0)

      char_file = os.path.join(output_dir,f"{img_name.split('.')[0]}_{i}.png")
      cv2.imwrite(char_file,char_img)

preprocess_images()

In [51]:
print(os.listdir(output_dir))

['Capitals_1.png', 'Capitals_3.png', 'Capitals_9.png', 'Capitals_0.png', 'Capitals_4.png', 'Capitals_6.png', 'Capitals_5.png', 'Capitals_2.png', 'Capitals_8.png', 'Capitals_7.png', 'Capitals_10.png', 'Capitals_18.png', 'Capitals_14.png', 'Capitals_11.png', 'Capitals_16.png', 'Capitals_13.png', 'Capitals_15.png', 'Capitals_12.png', 'Capitals_17.png', 'Capitals_19.png', 'Capitals_21.png', 'Capitals_22.png', 'Capitals_24.png', 'Capitals_23.png', 'Capitals_20.png', 'Capitals_25.png', 'Capitals_28.png', 'Capitals_27.png', 'Capitals_26.png', 'Capitals_29.png', 'Capitals_32.png', 'Capitals_31.png', 'Capitals_30.png', 'Capitals_33.png', 'Capitals_34.png', 'Capitals_35.png', 'Capitals_36.png', 'Capitals_37.png', 'Capitals_38.png', 'Capitals_40.png', 'Capitals_39.png', 'Capitals_41.png', 'Capitals_42.png', 'Capitals_44.png', 'Capitals_43.png', 'Capitals_45.png', 'Capitals_46.png', 'Capitals_47.png', 'Capitals_49.png', 'Capitals_48.png', 'Capitals_50.png', 'Capitals_51.png', 'Capitals_52.png', 'C

In [52]:
def load_data(data_dir):
  images = []
  for img_name in os.listdir(data_dir):
    img_path = os.path.join(data_dir, img_name)
    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    img = img/127.5 - 1.0
    images.append(img.reshape(36,36,1))
  return np.array(images)

data = load_data(output_dir)


In [53]:
import cv2
import numpy as np
import os
import string
from matplotlib import pyplot as plt

input_path = "/content/drive/My Drive/Handwriting_dataset/Capitals.jpg"
output_folder = "/content/labeled_handwriting"
os.makedirs(output_folder, exist_ok=True)

# Load and preprocess the image
img = cv2.imread(input_path, cv2.IMREAD_GRAYSCALE)
_, img_bin = cv2.threshold(img, 128, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)

# Find character contours
contours, _ = cv2.findContours(img_bin, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

# Sort contours left to right
contours = sorted(contours, key=lambda c: cv2.boundingRect(c)[0])

# Save characters as A–Z
labels = list(string.ascii_uppercase)
for i, contour in enumerate(contours[:26]):
    x, y, w, h = cv2.boundingRect(contour)
    char_img = img_bin[y:y+h, x:x+w]
    char_img = cv2.resize(char_img, (36, 36))
    filename = os.path.join(output_folder, f"{labels[i]}.png")
    cv2.imwrite(filename, char_img)

print("Labeled and saved all uppercase characters.")


Labeled and saved all uppercase characters.


In [54]:
input_path = "/content/drive/My Drive/Handwriting_dataset/Smalls.jpg"
output_folder = "/content/labeled_handwriting"
os.makedirs(output_folder, exist_ok=True)

# Load and preprocess
img = cv2.imread(input_path, cv2.IMREAD_GRAYSCALE)
_, img_bin = cv2.threshold(img, 128, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)

# Find and sort contours
contours, _ = cv2.findContours(img_bin, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = sorted(contours, key=lambda c: cv2.boundingRect(c)[0])

# Save as a–z
labels = list(string.ascii_lowercase)
for i, contour in enumerate(contours[:26]):
    x, y, w, h = cv2.boundingRect(contour)
    char_img = img_bin[y:y+h, x:x+w]
    char_img = cv2.resize(char_img, (36, 36))
    filename = os.path.join(output_folder, f"{labels[i]}.png")
    cv2.imwrite(filename, char_img)

print("Lowercase characters saved.")


Lowercase characters saved.


In [55]:
input_path = "/content/drive/My Drive/Handwriting_dataset/nums_chars.jpg"
output_folder = "/content/labeled_handwriting"
os.makedirs(output_folder, exist_ok=True)

# Load and binarize
img = cv2.imread(input_path, cv2.IMREAD_GRAYSCALE)
_, img_bin = cv2.threshold(img, 128, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)

# Detect and sort contours
contours, _ = cv2.findContours(img_bin, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = sorted(contours, key=lambda c: cv2.boundingRect(c)[0])

# Labels: 0–9 followed by common punctuation (modify based on your image)
labels = list("0123456789.,!?;:-()[]")

for i, contour in enumerate(contours[:len(labels)]):
    x, y, w, h = cv2.boundingRect(contour)
    char_img = img_bin[y:y+h, x:x+w]
    char_img = cv2.resize(char_img, (36, 36))
    filename = os.path.join(output_folder, f"{labels[i]}.png")
    cv2.imwrite(filename, char_img)

print("Numbers and symbols saved.")


Numbers and symbols saved.


In [56]:
import tensorflow as tf
from tensorflow.keras import layers

In [57]:
#Generator model
def build_generator():
  model=tf.keras.Sequential([
      layers.Dense(128,activation='relu',input_shape=(100,)),
      layers.Dense(256,activation='relu'),
      layers.Dense(36*36,activation='tanh'),
      layers.Reshape((36,36,1))
  ])
  return model

In [58]:
#Discriminator model
def build_discriminator():
  model = tf.keras.Sequential([
      layers.Flatten(input_shape=(36,36,1)),
      layers.Dense(256,activation='relu'),
      layers.Dense(128,activation='relu'),
      layers.Dense(1,activation='sigmoid')
  ])
  return model

In [59]:

generator = build_generator()
discriminator = build_discriminator()

In [60]:
discriminator.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
generator.compile(loss='binary_crossentropy',optimizer='adam')

In [61]:
gan = tf.keras.Sequential([generator,discriminator])
gan.compile(loss='binary_crossentropy',optimizer='adam')

In [62]:
x_train = load_data(output_dir)

In [63]:
def train_gan(epochs=1000, batch_size=32):
  for epoch in range(epochs):
    idx= np.random.randint(0,x_train.shape[0],batch_size)
    real_images = x_train[idx]
    fake_images = generator.predict(np.random.randn(batch_size,100))

    d_loss_real = discriminator.train_on_batch(real_images, np.ones((batch_size,1)))
    d_loss_fake = discriminator.train_on_batch(fake_images, np.zeros((batch_size,1)))
    d_loss =0.5*np.add(d_loss_real,d_loss_fake)

    g_loss = gan.train_on_batch(np.random.randn(batch_size,100),np.ones((batch_size,1)))

    if epoch % 100 == 0:
      print(f"Epoch {epoch}: D Loss = {d_loss[0]}, G Loss = {g_loss}")
train_gan()

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 89ms/step
Epoch 0: D Loss = 0.5363556146621704, G Loss = 0.806812047958374
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step

In [64]:
from transformers import pipeline

qa_model = pipeline("text-generation",model="gpt2")

def generate_answer(question):
  return qa_model(question,max_length=100)[0]["generated_text"]

question = "What is AI?"
answer = generate_answer(question)
print(answer)

Device set to use cpu
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


What is AI?

AI is an emerging idea which aims to solve our problems as intelligently as possible and is based on theoretical evidence and experimental evidence.

In short AI works by detecting human behavior, rather than by making predictions about human behavior. The science behind AI has always been fascinating, with a huge range of potential applications in various fields from medicine, aerospace, finance, telecommunications and even military. Here are a few of the most interesting things that have appeared in recently published research articles


In [65]:
#cv2_imshow(final_handwritting)

In [74]:
def generate_handwritten_text(text, char_img_dir, output_image_path="handwritten_output.png"):
    text = text.strip()
    canvas_width = 20
    canvas_height = 36
    for char in text:
        if char == ' ':
            canvas_width += 30
            continue
        filename = f"{char}.png"
        img_path = os.path.join(char_img_dir, filename)
        if not os.path.exists(img_path):
            continue
        char_img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)

    canvas = np.ones((canvas_height, canvas_width), dtype=np.uint8) * 255
    x_offset = 10

    for char in text:
        if char == ' ':
            x_offset += 30
            continue
        filename = f"{char}.png"
        img_path = os.path.join(char_img_dir, filename)
        if not os.path.exists(img_path):
            continue
        char_img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        h, w = char_img.shape
        canvas[0:h, x_offset:x_offset+w] = char_img
        x_offset += w + 5

    cv2.imwrite(output_image_path, canvas)
    plt.imshow(canvas, cmap='gray')
    plt.axis('off')
    plt.show()
