## Import Libraries

In [107]:
from tensorflow.keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential, load_model
from keras.layers import Dense, Embedding, LSTM
from keras.utils import to_categorical
from pickle import dump, load
import numpy as np
import PyPDF2
import random
import spacy
import keras
import re

## Open and Convert PDF File to Python String

In [108]:
f = open('neural network.pdf', 'rb')
pdf_reader = PyPDF2.PdfReader(f)

In [109]:
# Extract text in the pdf file 
text  = ''
for i in pdf_reader.pages:
    text += i.extract_text()

print(text)

 
 
      
         ISSN: 2277 -3754   
    ISO 9001:2008 Certified  
International Journal of Engineering and Innovative Technology (IJEIT)  
Volume 2, Issue 1, July 2012  
 189 
Introduction to Artificial Neural Network  
A.D.Dongare, R.R.Kharde, Amit D.Kachare   
Abstract : - This paper presents an emergence of an Artificial 
Neural Network (ANN) as a tool for analysis of different 
parameters of a system. An Artificial Neural Network (ANN) is 
an information -processing paradigm that is inspired by the way 
biological nervous systems such as brain, process information. 
ANN consists of multiple layers of simple processing elements 
called as neurons. The neuron performs two functions, namel y, 
collection of inputs & generation of an output. Use of ANN 
provides overview of the theory, learning rules, and applications 
of the most important neural network models, definitions and 
style of Computation . The mathematical model of network 
throws the light on the concept of inputs, we

In [110]:
nlp = spacy.load('en_core_web_lg')
nlp.max_length = 1198623

## Data / Text Cleaning

In [118]:
text_list = text.split('\n')
print('Number of sentences:', len(text_list), end='\n\n')
[i for i in text_list]

Number of sentences: 562



[' ',
 ' ',
 '      ',
 '         ISSN: 2277 -3754   ',
 '    ISO 9001:2008 Certified  ',
 'International Journal of Engineering and Innovative Technology (IJEIT)  ',
 'Volume 2, Issue 1, July 2012  ',
 ' 189 ',
 'Introduction to Artificial Neural Network  ',
 'A.D.Dongare, R.R.Kharde, Amit D.Kachare   ',
 'Abstract : - This paper presents an emergence of an Artificial ',
 'Neural Network (ANN) as a tool for analysis of different ',
 'parameters of a system. An Artificial Neural Network (ANN) is ',
 'an information -processing paradigm that is inspired by the way ',
 'biological nervous systems such as brain, process information. ',
 'ANN consists of multiple layers of simple processing elements ',
 'called as neurons. The neuron performs two functions, namel y, ',
 'collection of inputs & generation of an output. Use of ANN ',
 'provides overview of the theory, learning rules, and applications ',
 'of the most important neural network models, definitions and ',
 'style of Computation 

In [119]:
def clean_text(txt: list[str]):
    temp = []
    for i in txt:
        if not i.isspace():
            temp.append(i.strip().lower())
    return temp

In [120]:
text_list = clean_text(text_list)
print('Number of sentences:', len(text_list), end='\n\n')
text_list

Number of sentences: 525



['issn: 2277 -3754',
 'iso 9001:2008 certified',
 'international journal of engineering and innovative technology (ijeit)',
 'volume 2, issue 1, july 2012',
 '189',
 'introduction to artificial neural network',
 'a.d.dongare, r.r.kharde, amit d.kachare',
 'abstract : - this paper presents an emergence of an artificial',
 'neural network (ann) as a tool for analysis of different',
 'parameters of a system. an artificial neural network (ann) is',
 'an information -processing paradigm that is inspired by the way',
 'biological nervous systems such as brain, process information.',
 'ann consists of multiple layers of simple processing elements',
 'called as neurons. the neuron performs two functions, namel y,',
 'collection of inputs & generation of an output. use of ann',
 'provides overview of the theory, learning rules, and applications',
 'of the most important neural network models, definitions and',
 'style of computation . the mathematical model of network',
 'throws the light on th

In [121]:
def filter_sentences(txt: list[str]) -> list[str]:

    temp = []
    
    for i in txt:
        if i.lower() == 'the authors would like to acknowledge & thanks to dr.':
            break
        
        patterns = [
            r"^[iv]+.",
            r"issn:.*",
            r"iso.*certified$",
            r"international journal of engineering and innovative technology \(ijeit\)",
            r"volume \d, issue \d, .+",
            r"^\S+$",
            r"^fig\.\s?\d+.+",
            r"^table\s?\d+\..+",
            r"^[a-z]+\.",
            r"^\d+ \..+",
            r"^\d+\. .+",
        ]
        
        if any(re.search(pattern, i, re.IGNORECASE) for pattern in patterns):
            continue
        
        temp.append(i)
    
    return temp


In [122]:
text_list = filter_sentences(text_list)
print('Number of sentences:', len(text_list), end='\n\n')
text_list

Number of sentences: 344



['abstract : - this paper presents an emergence of an artificial',
 'neural network (ann) as a tool for analysis of different',
 'parameters of a system. an artificial neural network (ann) is',
 'an information -processing paradigm that is inspired by the way',
 'biological nervous systems such as brain, process information.',
 'ann consists of multiple layers of simple processing elements',
 'called as neurons. the neuron performs two functions, namel y,',
 'collection of inputs & generation of an output. use of ann',
 'provides overview of the theory, learning rules, and applications',
 'of the most important neural network models, definitions and',
 'style of computation . the mathematical model of network',
 'throws the light on the concept of inputs, weights, summing',
 'function, activation function & outputs. then ann helps to',
 'decide the type of learning for  adjustment of weights with',
 'change in parameters. finally the analysis of a system is',
 'completed by ann impleme

In [123]:
def replace_patterns(txt):
    patterns = [
        r'in\sfig\.\s\d',
        r'fig\.\s\d',
        r'\s?\[\s?\d\s?]\s?',
    ]
    for pattern in patterns:
        txt = re.sub(pattern, '', txt)

    return txt

def replace_patterns_in_sentences(sentences):
    new_sentences = []
    for sentence in sentences:
        new_sentence = replace_patterns(sentence)
        new_sentences.append(new_sentence)
    return new_sentences

In [124]:
text_list = replace_patterns_in_sentences(text_list)
text_list

['abstract : - this paper presents an emergence of an artificial',
 'neural network (ann) as a tool for analysis of different',
 'parameters of a system. an artificial neural network (ann) is',
 'an information -processing paradigm that is inspired by the way',
 'biological nervous systems such as brain, process information.',
 'ann consists of multiple layers of simple processing elements',
 'called as neurons. the neuron performs two functions, namel y,',
 'collection of inputs & generation of an output. use of ann',
 'provides overview of the theory, learning rules, and applications',
 'of the most important neural network models, definitions and',
 'style of computation . the mathematical model of network',
 'throws the light on the concept of inputs, weights, summing',
 'function, activation function & outputs. then ann helps to',
 'decide the type of learning for  adjustment of weights with',
 'change in parameters. finally the analysis of a system is',
 'completed by ann impleme