#### ***OCR BUILDING PART***

#### ***IMPORT DEPENDENCIES***

In [2]:
import numpy as np
import pandas as pd
import math
import cv2
import tensorflow as tf
from tensorflow.keras.models import Sequential
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import TensorBoard

In [1]:
!pip install psutil

Defaulting to user installation because normal site-packages is not writeable


In [2]:
import psutil
print(psutil.cpu_percent())
print(psutil.virtual_memory().percent)

13.8
84.2


#### ***LOADING DATA AND PREPROCESSING***

In [4]:
df = pd.read_csv('alphabets.csv', header = 0)
df = df[df['label'].str.isalpha() & df['label'].str.isupper()]
df = df.reset_index(drop=True)

  df = pd.read_csv('alphabets.csv', header = 0)


In [5]:
df.shape

(371391, 785)

In [3]:
labels = df.iloc[:,0]
alpha_df = df.iloc[:,1:]
alpha_df=alpha_df.astype(float)
data = alpha_df.to_numpy()

In [4]:
#Reshaping to make proper array
num_images = data.shape[0]
images = data.reshape((num_images,28, 28, 1))

#Normalize to range [0,1]
images = images/255.0

In [48]:
images[37899][17][27][0]

0.0

#### ***MAKING OUR MODEL***

In [5]:
def cnn_model(shape, num_labels) :
    model = Sequential()
    model.add(Conv2D(32,(3,3), activation='relu', padding='same', input_shape=shape))
    model.add(MaxPooling2D((2,2)))
    model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
    model.add(MaxPooling2D((2, 2)))
    model.add(Conv2D(128, (3, 3), activation='relu', padding='same'))
    model.add(MaxPooling2D((2, 2)))
    model.add(Flatten())
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(num_labels, activation='softmax'))
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model
shape = (28, 28, 1)
num_labels = 26 #Alphabets
model = cnn_model(shape, num_labels)

#Prepare tensorboard callback
tensorboard_callback = TensorBoard(log_dir='./logs')

#We have a work of making proper labels
label_mapping = {chr(i+65) : i for i in range(26)}
numerical_labels = np.array([label_mapping[label] for label in labels])
f_labels = to_categorical(numerical_labels, num_classes=num_labels)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


#### ***TRAINING AND EVALUATING OUR MODEL***

In [9]:
x_train, x_test, y_train, y_test = train_test_split(images, f_labels, test_size=0.2, random_state=42)

In [None]:
training = model.fit(x_train, y_train, epochs=5, batch_size=32, verbose=1, callbacks=[tensorboard_callback])

In [13]:
loss, accuracy = model.evaluate(x_test, y_test)
print(loss, accuracy)

[1m2322/2322[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 4ms/step - accuracy: 0.9901 - loss: 0.0374
0.0379519984126091 0.9899433255195618


### ***SENTIMENT ANALYSIS PART***

#### ***USING MULTINOMIALNB***

In [30]:
from sklearn.feature_extraction.text import CountVectorizer
df = pd.read_csv('sentiment_analysis_dataset.csv')

#df['line'] = df['line'].apply(preprocess)
#Vectorize the text data
vectorizer = CountVectorizer()
x = vectorizer.fit_transform(df['line'])

#Encode labels
y = df['sentiment'].map({'Happy':1, 'Neutral':0, 'Angry':-1})
x

<30x272 sparse matrix of type '<class 'numpy.int64'>'
	with 555 stored elements in Compressed Sparse Row format>

In [31]:
#Splitting the data
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [32]:
from sklearn.naive_bayes import MultinomialNB
model_ = MultinomialNB()
model_.fit(x_train, y_train)

In [33]:
from sklearn.metrics import accuracy_score, classification_report
#Prediction
y_pred = model_.predict(x_test)

#Evaluation
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, target_names=['angry','neutral','happy'])

print(accuracy, report)

0.6666666666666666               precision    recall  f1-score   support

       angry       0.50      0.50      0.50         2
     neutral       1.00      1.00      1.00         2
       happy       0.50      0.50      0.50         2

    accuracy                           0.67         6
   macro avg       0.67      0.67      0.67         6
weighted avg       0.67      0.67      0.67         6



#### ***OWN MODEL***

In [34]:
import nltk
from nltk.corpus import stopwords
from string import punctuation

def clean_text(text):
    # Download stopwords if not already downloaded (comment out if you have them)
    #nltk.download('stopwords')
    # Get stopwords in English
    stop_words = set(stopwords.words('english'))
    # Combine punctuation and lowercase for case-insensitivity
    punc_lower = set(punctuation.lower())
    # Remove stop words and punctuation
    filtered_words = [word for word in text.lower().split() if word not in stop_words and word not in punc_lower]
    # Join the filtered words back into a string
    return " ".join(filtered_words)

# Example usage
#text = "This is to remove stop words. Will it?."
#cleaned_text = remove_stopwords_punctuation(text)
#print(cleaned_text)


In [35]:
def features(data) :
    word_count = {}
    data = data.lower()
    #print(data)
    for word in data.split(' ') :
        #print()
        if word in word_count :
            word_count[word] += 1
        else :
            word_count[word] = 1
    return word_count

In [36]:
df['line'] = df['line'].apply(clean_text)
df['word_count'] = df['line'].apply(features)
len(df['word_count'])

30

In [37]:
#Train-test-split
x_train, x_test, y_train, y_test = train_test_split(df['line'], y, test_size = 0.2, random_state=42) 

In [38]:
count = np.zeros(3)
p_class = np.zeros(3)
for x in y :
    count[x] += 1
for x in y :
    p_class[x] = count[x]/np.sum(count)
p_class

array([0.33333333, 0.33333333, 0.33333333])

In [39]:
import math
#This was as sample to do
def naive_bayes(data, num_label):
    p_features_class = {}
    word_count = features(data)
    sum_ = sum(word_count.values())
    for word, value in word_count.items():
        p_features_class[word] = value/sum_
    product = math.prod(p_features_class.values())
    p_class_feature = [product*p_class[x] for x in range(num_label)]
    f_label = np.argmax(p_class_feature)
    return f_label

In [17]:
new_data = "This is a nice day!"  # Replace with your new data
predicted_label = naive_bayes(new_data, 3)
print("Predicted label:", predicted_label)

Predicted label: 0


#### ***Training***

In [40]:
def train_naive_bayes(X_train, y_train, num_label):
    p_features_class = {}
    for data, label in zip(X_train, y_train):
        word_count = features(data)
        sum_ = sum(word_count.values())
        for word, value in word_count.items():
            if word in p_features_class:
                p_features_class[word][label] = p_features_class[word].get(label, 0) + value/sum_
            else:
                p_features_class[word] = {k: 0 for k in range(num_label)}
                p_features_class[word][label] = value/sum_
    return p_features_class

p_features_class = train_naive_bayes(x_train, y_train, 3)

#### ***Evaluating***

In [41]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

def predict_naive_bayes(X_test, p_features_class, num_label, smoothing=1):
    y_pred = []
    for data in X_test:
        word_count = features(data)
        sum_ = sum(word_count.values()) + smoothing * len(word_count)
        p_class_feature = [1 for _ in range(num_label)]
        for word, value in word_count.items():
            if word in p_features_class:
                for j in range(num_label):
                    p_class_feature[j] *= p_features_class[word].get(j, 0)
            else:
                for j in range(num_label):
                    p_class_feature[j] *= smoothing / num_label
        f_label = np.argmax(p_class_feature)
        y_pred.append(f_label)
    return y_pred

y_pred = predict_naive_bayes(x_test, p_features_class, 3)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)

Accuracy: 0.3333333333333333
Precision: 0.25
Recall: 0.3333333333333333
F1-score: 0.27777777777777773


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


#### ***APPLYING THIS ON TARGET IMAGES***

In [None]:
# List of image file paths
image_files = ['line_1.png', 'line_2.png', 'line_3.png', 'line_4.png', 'line_5.png', 'line_6.png']

for image_path in image_paths:
    # Load the image using OpenCV
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    # Check if the image is loaded successfully
    if img is None:
        print("Error loading image")
    else:
        # Resize the image to 28x28 pixels
        img = cv2.resize(img, (28, 28))
        
        # Convert the image to a numpy array
        img_array = np.array(img)
        
        # Reshape the image array to match the model input shape
        img_array = img_array.reshape((1, 28, 28, 1))
        
        # Normalize the image array to range [0,1]
        img_array = img_array / 255.0
        
        # Make predictions
        pred_text = model.predict(img_array)

        # Get the index of the highest probability
        pred_index = np.argmax(pred_text)
   
       # Convert the index to the corresponding alphabet
        pred_text = chr(pred_index + 65)
        
        # Print the predicted text
        print("Predicted text for image", image_path, ":", pred_text)

In [None]:
y_pred = predict_naive_bayes(pred_index, p_features_class, 3)

In [None]:
target_labels = df.read_csv('target_labels.csv')
y_target = target_labels['sentiment'].map({'Happy':1, 'Neutral':0, 'Angry':-1})
accuracy = accuracy_score(y_target, y_pred)
precision = precision_score(y_target, y_pred, average='weighted')
recall = recall_score(y_target, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)