In [1]:
from simpletransformers.ner import NERModel
from nltk.corpus import stopwords, wordnet
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer, PorterStemmer
from sklearn.metrics import accuracy_score
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import LabelEncoder
import pickle, emoji, string, pandas as pd

In [22]:
with open('model/NER_Model_pickle_v4', 'rb') as file:
	model = pickle.load(file)

In [5]:
data = pd.read_csv('datasets/cpu-iob-ner.csv', encoding='latin1')
data.head()

Unnamed: 0,Sentence #,Word,Tag
0,Sentence 1,Snapdragon,B-performance
1,Sentence 1,855+,I-performance
2,Sentence 2,Snapdragon,B-performance
3,Sentence 2,835,I-performance
4,Sentence 3,Snapdragon,B-performance


In [20]:
data = pd.read_csv('datasets/cpu-iob-ner-2.csv', encoding='latin1')
data.head()

Unnamed: 0,Sentence #,Word,Tag
0,Sentence 1,snapdragon,B-performance
1,Sentence 1,630,I-performance
2,Sentence 2,snapdragon,B-performance
3,Sentence 2,435,I-performance
4,Sentence 3,helio,B-performance


In [31]:
data = pd.read_csv('datasets/iob-ner.csv', encoding='latin1')
data.head()

Unnamed: 0,Sentence #,Word,Tag
0,Sentence 1,using,O
1,Sentence 1,6,O
2,Sentence 1,month,O
3,Sentence 1,14k,O
4,Sentence 1,purchased,O


In [6]:
def data_preps(data):
	data["Sentence #"] = LabelEncoder().fit_transform(data["Sentence #"])
	data.rename(columns={"Sentence #": "sentence_id", "Word": "words", "Tag": "labels"}, inplace =True)
	data["labels"] = data["labels"].str.upper()
	data.dropna(inplace=True)

	X = data[["sentence_id","words"]]
	Y = data["labels"]

	return X, Y

In [32]:
X, Y = data_preps(data)

In [3]:
ENGLISH_STOPWORDS = stopwords.words('english')
PUNCTUATION = string.punctuation
LEMMATIZER = WordNetLemmatizer()

In [4]:
def clean_text(text):
  # Lowercase the text
  text = text.lower()
  text = emoji.replace_emoji(text, replace='')

  words = word_tokenize(text)
  words = [word for word in words if word not in ENGLISH_STOPWORDS]
  words = [LEMMATIZER.lemmatize(word) for word in words]
  words = [word for word in words if word not in PUNCTUATION]
  words = [word for word in words if word.isalnum()]

  # Join the cleaned tokens back into a string
  clean_text = ' '.join(words)
  return clean_text

In [80]:
# But not the Honor Magic V2, though. It remains special, like really special, for both foldable fans and the smartphone realm in general. Sure, it might be running a generation-old Snapdragon 8 Gen 2 chipset at this point, but that's about the only non-current and non-revolutionary thing about the Magic V2.
# The island replaces Apple’s familiar and oft-reviled notch; it’s where the front camera and the Face ID system live since they’ve got to take up some space on the front of the display. Here’s the thing about the notch, though: after a few minutes of using it, it all but disappears.
# display lcd much good oled clearly see difference compare low quality oled even real cheapest set oled display beat iphone 11 touch corner display sometime work instantly always face issue u connect sound quality lag app
# The camera is the real showstopper here and rightfully, so. The retractable main camera is thoroughly interesting with its unique mechanical design, sure, but we cannot wait to test its 1" sensor, one that makes use of sensor-shift stabilization, variable aperture lens and laser-assisted AF. Next is a 50MP telephoto camera for 3.5x optical zoom and up to 10x hybrid zoom, one that can do macro photos from as close as 5cm away! The 40MP ultrawide camera promises really wide field of view with its 13mm aperture lens, and it is AF-capable.	
# tensor
txt = '''
Apple pulled off some unexpected surprises with the iPhone 14 Pro: there had been lots of solid rumors about the company switching from putting the front-facing camera and Face ID system in a pill-shaped cutout instead of the familiar notch, but the new “Dynamic Island” alert system came out of nowhere. And while it was getting clearer that Apple would have to follow the industry in using bigger camera sensors eventually, Apple went even further and rebooted its entire computational photography system as the Photonic Engine.
'''
txt = clean_text(txt)
print(txt)

apple pulled unexpected surprise iphone 14 pro lot solid rumor company switching putting camera face id system cutout instead familiar notch new dynamic island alert system came nowhere getting clearer apple would follow industry using bigger camera sensor eventually apple went even rebooted entire computational photography system photonic engine


In [81]:
prediction, model_output = model.predict([txt])

  0%|          | 0/1 [00:00<?, ?it/s]

Running Prediction:   0%|          | 0/1 [00:00<?, ?it/s]

In [124]:
prediction[0]

[{'apple': 'O'},
 {'pulled': 'O'},
 {'unexpected': 'O'},
 {'surprise': 'O'},
 {'iphone': 'O'},
 {'14': 'O'},
 {'pro': 'O'},
 {'lot': 'O'},
 {'solid': 'O'},
 {'rumor': 'O'},
 {'company': 'O'},
 {'switching': 'O'},
 {'putting': 'O'},
 {'camera': 'B-CAMERA'},
 {'face': 'B-FEATURE'},
 {'id': 'I-FEATURE'},
 {'system': 'O'},
 {'cutout': 'O'},
 {'instead': 'O'},
 {'familiar': 'O'},
 {'notch': 'O'},
 {'new': 'O'},
 {'dynamic': 'O'},
 {'island': 'O'},
 {'alert': 'O'},
 {'system': 'O'},
 {'came': 'O'},
 {'nowhere': 'O'},
 {'getting': 'O'},
 {'clearer': 'O'},
 {'apple': 'O'},
 {'would': 'O'},
 {'follow': 'O'},
 {'industry': 'O'},
 {'using': 'O'},
 {'bigger': 'O'},
 {'camera': 'B-CAMERA'},
 {'sensor': 'B-FEATURE'},
 {'eventually': 'O'},
 {'apple': 'O'},
 {'went': 'O'},
 {'even': 'O'},
 {'rebooted': 'O'},
 {'entire': 'O'},
 {'computational': 'O'},
 {'photography': 'B-CAMERA'},
 {'system': 'O'},
 {'photonic': 'O'},
 {'engine': 'O'}]

In [137]:
word = [key for item in prediction[0] for key in item.keys()]
word = ' '.join(word)

tag = [key for item in prediction[0] for key in item.values()]
tag = ' '.join(tag)

In [135]:
re_train = pd.DataFrame({'sentence_id': 1, 'words': word, 'labels': tag})
re_train.head()

Unnamed: 0,sentence_id,words,labels
0,1,apple,O
1,1,pulled,O
2,1,unexpected,O
3,1,surprise,O
4,1,iphone,O
