In [2]:
import warnings
warnings.filterwarnings("ignore")

import folium
import pandas as pd
import numpy as np
from tensorflow.keras.models import load_model
import os
import re
import contractions
from textblob import TextBlob
from tensorflow.keras.preprocessing.sequence import pad_sequences
import nltk
from tqdm import tqdm
nltk.download('punkt')
nltk.download('wordnet')
from nltk.corpus import stopwords
stop_words=set(stopwords.words('english'))

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\HP\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\HP\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [3]:
model = load_model("models/ConvolutionalLongShortTermMemory_model.h5",compile=False)

In [4]:
class_labels = ['age', 'ethnicity', 'gender', 'not_cyberbullying', 'religion']

In [7]:
import pickle
with open(file="models/tokens.pkl",mode="rb") as file:
    tok = pickle.load(file=file)

In [8]:
def clean_text(text):
    # expand contraction for words
    text=contractions.fix(text)
    # remove charectir emojes
    emoticons = [r':\)', r':\(', r':P']
    pattern = '|'.join(emoticons)
    text = re.sub(pattern, '', text)
    # remove mentions (@)
    text = re.sub(r'@\w+', '', text)
    # remove hashtags (#)
    text = re.sub(r'#\w+', '', text)
    # remove URLs (http and https)
    text = re.sub(r'https?://\S+', '', text)
    # remove non-alphanumeric characters
    text = re.sub(r'[^\w\s]', '', text)
    # remove extra whitespaces
    text = re.sub(r'\s+', ' ', text).strip()
    # Perform lemmatization on each word in the sentence
    blob = TextBlob(text)
    lemmatized_words = [word.lemmatize() for word in blob.words]
    # Join the lemmatized words back into a sentence
    lemmatized_sentence = " ".join(lemmatized_words)
    # convert to lowercase
    text = lemmatized_sentence.lower()
    return text

---

In [9]:
user_input_path = "user_input/test_input_5.csv"

In [10]:
df = pd.read_csv(user_input_path)
df.head(10)

Unnamed: 0,City,Lat,Lng,Text
0,Pakri,25.5876,85.158,"Been giving her the silent treatment, it was s..."
1,Hunasamaranhalli,13.1435,77.62,Dumb ass niggers with no heart ' fuck ya'll ...
2,Hesarghatta,13.1391,77.4783,Black is a color . African American is a Cultu...
3,Bommayapālaiyam,11.9922,79.8499,Noo fuck youu !!! &amp; Stay Mad with your mad...
4,Gundūr,10.7339,78.7184,@KhaledHamaki He beheaded 600 Jewish prisoners...
5,Punādih,25.5484,85.2649,So attacking children because they are born wh...
6,Harilādih,23.63,86.35,@alaan_wtf ese tambien lo deberia de estar vie...
7,Alāwalpur,25.4958,85.2021,@TripleCeez004 woot
8,Mādnāikanhalli,13.0626,77.4642,the stupidest thing in the world is when a str...
9,Kādiganahalli,13.1687,77.6283,@CHEEEKABOO LOL I don't give a fuck you dumb b...


In [11]:
df.shape

(25, 4)

In [12]:
cleaned_samples = []
for sample in tqdm(df['Text'].values):
    cleaned_samples.append(clean_text(sample))

100%|██████████████████████████████████████████████████████████████████████████████████| 25/25 [00:02<00:00,  9.13it/s]


In [13]:
useful_data = []
for cleaned_ in cleaned_samples:
    num_data=tok.texts_to_sequences([cleaned_])
    pad_text=pad_sequences(sequences=num_data,maxlen=40,padding="post",truncating="post")
    useful_data.append(pad_text)

In [14]:
useful_data[:5]

[array([[2299,  527,  994,   21, 1114, 2263,   34, 2498,   72,  251,  101,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0]]),
 array([[   7,   19,    8,  569,   94,  589,    3, 6086,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0]]),
 array([[ 25, 127,  68, 540, 617, 298, 127,  68, 378,   8,  76, 317, 453,
         206, 202,   7,   3,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0]]),
 array([[6717,    3, 6155,   26,  429,  165,  569,  206,  165,   19,    3,
           55,    7,   19,    8,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,   

# Model Prediction

In [15]:
RESULTS = []

for usefule_sample in useful_data:
    prediction = model.predict(usefule_sample)
    predicted_label = class_labels[np.argmax(prediction)]
    RESULTS.append(predicted_label)

In [16]:
df["RESULT"] = RESULTS
df.head()

Unnamed: 0,City,Lat,Lng,Text,RESULT
0,Pakri,25.5876,85.158,"Been giving her the silent treatment, it was s...",not_cyberbullying
1,Hunasamaranhalli,13.1435,77.62,Dumb ass niggers with no heart ' fuck ya'll ...,ethnicity
2,Hesarghatta,13.1391,77.4783,Black is a color . African American is a Cultu...,ethnicity
3,Bommayapālaiyam,11.9922,79.8499,Noo fuck youu !!! &amp; Stay Mad with your mad...,ethnicity
4,Gundūr,10.7339,78.7184,@KhaledHamaki He beheaded 600 Jewish prisoners...,religion


In [17]:
df['COLORS'] = df['RESULT'].apply(lambda x: "green" if x == 'not_cyberbullying' else "red")
df.head()

Unnamed: 0,City,Lat,Lng,Text,RESULT,COLORS
0,Pakri,25.5876,85.158,"Been giving her the silent treatment, it was s...",not_cyberbullying,green
1,Hunasamaranhalli,13.1435,77.62,Dumb ass niggers with no heart ' fuck ya'll ...,ethnicity,red
2,Hesarghatta,13.1391,77.4783,Black is a color . African American is a Cultu...,ethnicity,red
3,Bommayapālaiyam,11.9922,79.8499,Noo fuck youu !!! &amp; Stay Mad with your mad...,ethnicity,red
4,Gundūr,10.7339,78.7184,@KhaledHamaki He beheaded 600 Jewish prisoners...,religion,red


In [19]:
COMEPLETE_DETAILS = []

for i in range(len(df)):
    cur_df = df.iloc[[i]]
    cit_name = cur_df["City"].values[0]
    result = cur_df["RESULT"].values[0]
    infos = f"CITY:{cit_name} | RESULT:{result}"
    COMEPLETE_DETAILS.append(infos)
df['INFOS'] = COMEPLETE_DETAILS

In [20]:
df.head()

Unnamed: 0,City,Lat,Lng,Text,RESULT,COLORS,INFOS
0,Pakri,25.5876,85.158,"Been giving her the silent treatment, it was s...",not_cyberbullying,green,CITY:Pakri | RESULT:not_cyberbullying
1,Hunasamaranhalli,13.1435,77.62,Dumb ass niggers with no heart ' fuck ya'll ...,ethnicity,red,CITY:Hunasamaranhalli | RESULT:ethnicity
2,Hesarghatta,13.1391,77.4783,Black is a color . African American is a Cultu...,ethnicity,red,CITY:Hesarghatta | RESULT:ethnicity
3,Bommayapālaiyam,11.9922,79.8499,Noo fuck youu !!! &amp; Stay Mad with your mad...,ethnicity,red,CITY:Bommayapālaiyam | RESULT:ethnicity
4,Gundūr,10.7339,78.7184,@KhaledHamaki He beheaded 600 Jewish prisoners...,religion,red,CITY:Gundūr | RESULT:religion


In [21]:
df['INFOS'][0]

'CITY:Pakri | RESULT:not_cyberbullying'

In [22]:
world_all_cities_colored = folium.Map(zoom_start=2,
                                      location=[13.133932434766733, 16.103938729508073])

for _, city in df.iterrows():
    folium.Marker(location=[city['Lat'], city['Lng']],
                  tooltip=city['INFOS'],
                  icon=folium.Icon(color=city['COLORS'], prefix='fa', icon='circle')).add_to(world_all_cities_colored)
    
world_all_cities_colored

---