### HATE SPEECH RECOGNITION PROJECT

#### INSTALL REQUIRED LIBRARIES

In [1]:
!pip install pandas
!pip install numpy
!pip install -U scikit-learn
!pip install scikit-learn
!pip install nltk



#### IMPORTING REQUIRED LIBRARIES

In [2]:
import os
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
import nltk
import re
nltk.download('stopwords')
from nltk.corpus import stopwords
stopWord = set(stopwords.words('english'))
stemmer = nltk.SnowballStemmer("english")

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\rosha\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


#### READING THE DATASET

In [3]:
os.chdir(r"C:\Users\rosha\OneDrive\Desktop\Hate Speech Recognition\Input Files")
data = pd.read_csv('labeled_data.csv')
data

Unnamed: 0.1,Unnamed: 0,count,hate_speech,offensive_language,neither,class,tweet
0,0,3,0,0,3,2,!!! RT @mayasolovely: As a woman you shouldn't...
1,1,3,0,3,0,1,!!!!! RT @mleew17: boy dats cold...tyga dwn ba...
2,2,3,0,3,0,1,!!!!!!! RT @UrKindOfBrand Dawg!!!! RT @80sbaby...
3,3,3,0,2,1,1,!!!!!!!!! RT @C_G_Anderson: @viva_based she lo...
4,4,6,0,6,0,1,!!!!!!!!!!!!! RT @ShenikaRoberts: The shit you...
...,...,...,...,...,...,...,...
24778,25291,3,0,2,1,1,you's a muthaf***in lie &#8220;@LifeAsKing: @2...
24779,25292,3,0,1,2,2,"you've gone and broke the wrong heart baby, an..."
24780,25294,3,0,3,0,1,young buck wanna eat!!.. dat nigguh like I ain...
24781,25295,6,0,6,0,1,youu got wild bitches tellin you lies


#### PREVIEWING THE DATASET

In [4]:
print(data.head())

   Unnamed: 0  count  hate_speech  offensive_language  neither  class  \
0           0      3            0                   0        3      2   
1           1      3            0                   3        0      1   
2           2      3            0                   3        0      1   
3           3      3            0                   2        1      1   
4           4      6            0                   6        0      1   

                                               tweet  
0  !!! RT @mayasolovely: As a woman you shouldn't...  
1  !!!!! RT @mleew17: boy dats cold...tyga dwn ba...  
2  !!!!!!! RT @UrKindOfBrand Dawg!!!! RT @80sbaby...  
3  !!!!!!!!! RT @C_G_Anderson: @viva_based she lo...  
4  !!!!!!!!!!!!! RT @ShenikaRoberts: The shit you...  


#### CATEGORIZATION OF COMMENTS AS HATE, OFFENSIVE OR NONE

In [5]:
data['labels'] = data['class'].map({0:"Hate Speech", 1:"Offensive Speech", 2:"Neither Hate nor Offensive"})

In [6]:
data.columns

Index(['Unnamed: 0', 'count', 'hate_speech', 'offensive_language', 'neither',
       'class', 'tweet', 'labels'],
      dtype='object')

#### SELECTING NECESSARY COLUMNS

In [7]:
data_1 = data[["tweet","labels"]]

In [8]:
print(data_1)

                                                   tweet  \
0      !!! RT @mayasolovely: As a woman you shouldn't...   
1      !!!!! RT @mleew17: boy dats cold...tyga dwn ba...   
2      !!!!!!! RT @UrKindOfBrand Dawg!!!! RT @80sbaby...   
3      !!!!!!!!! RT @C_G_Anderson: @viva_based she lo...   
4      !!!!!!!!!!!!! RT @ShenikaRoberts: The shit you...   
...                                                  ...   
24778  you's a muthaf***in lie &#8220;@LifeAsKing: @2...   
24779  you've gone and broke the wrong heart baby, an...   
24780  young buck wanna eat!!.. dat nigguh like I ain...   
24781              youu got wild bitches tellin you lies   
24782  ~~Ruffled | Ntac Eileen Dahlia - Beautiful col...   

                           labels  
0      Neither Hate nor Offensive  
1                Offensive Speech  
2                Offensive Speech  
3                Offensive Speech  
4                Offensive Speech  
...                           ...  
24778            Offensive 

In [9]:
data_1.head()

Unnamed: 0,tweet,labels
0,!!! RT @mayasolovely: As a woman you shouldn't...,Neither Hate nor Offensive
1,!!!!! RT @mleew17: boy dats cold...tyga dwn ba...,Offensive Speech
2,!!!!!!! RT @UrKindOfBrand Dawg!!!! RT @80sbaby...,Offensive Speech
3,!!!!!!!!! RT @C_G_Anderson: @viva_based she lo...,Offensive Speech
4,!!!!!!!!!!!!! RT @ShenikaRoberts: The shit you...,Offensive Speech


#### TEXT PREPROCESSING AND CLEANING (NLP STEPS)

In [10]:
def clean (text):
    text = str(text).lower()
    text = re. sub('[.?]','', text)
    text = re.sub('https?://\S+|www.\S+', '', text)
    text = re.sub('<.?>+', '', text)
    text = re.sub(r'[^\w\s]','',text)
    text = re.sub('\n', '', text)
    text = re.sub('\w\d\w', '', text)
    text = [word for word in text.split(' ') if word not in stopWord]
    text = " ".join(text)
    text = [stemmer.stem(word) for word in text.split(' ')]
    return text

data["tweet_cleaned"] = data["tweet"].apply(clean)
    

#### PREPARING DATA FOR MODELING

In [11]:
x = np.array(data["tweet_cleaned"])
y = np.array(data["labels"])
print(x)
print(y)

[list(['', 'rt', 'mayasolov', 'woman', 'shouldnt', 'complain', 'clean', 'hous', 'amp', 'man', 'alway', 'take', 'trash'])
 list(['', 'rt', 'mlee', 'boy', 'dat', 'coldtyga', 'dwn', 'bad', 'cuffin', 'dat', 'hoe', '1st', 'place'])
 list(['', 'rt', 'urkindofbrand', 'dawg', 'rt', 'babif', 'ever', 'fuck', 'bitch', 'start', 'cri', 'confus', 'shit'])
 ...
 list(['young', 'buck', 'wanna', 'eat', 'dat', 'nigguh', 'like', 'aint', 'fuckin', 'dis'])
 list(['youu', 'got', 'wild', 'bitch', 'tellin', 'lie'])
 list(['ruffl', '', 'ntac', 'eileen', 'dahlia', '', 'beauti', 'color', 'combin', 'pink', 'orang', 'yellow', 'amp', 'white', 'coll', ''])]
['Neither Hate nor Offensive' 'Offensive Speech' 'Offensive Speech' ...
 'Offensive Speech' 'Offensive Speech' 'Neither Hate nor Offensive']


#### JOINING TOKENIZED WORDS FOR TEXT PROCESSING AND TRAINING IN MODEL

In [12]:
x = [' '.join(tokens) for tokens in x]

In [13]:
y = [' '.join(tokens) for tokens in y]

#### INITIALIZING COUNT VECTORIZER (TEXT --> NUMERICAL REPRESENTATION BASED ON WORD COUNTS FOR TRAINING)

In [14]:
cv = CountVectorizer()

In [15]:
X = cv.fit_transform(x)

In [16]:
X_train, X_test, Y_train, Y_test = train_test_split(X,y,test_size=0.33, random_state=42)

#### CREATING DECISION TREE MODEL

In [17]:
model = DecisionTreeClassifier()

#### TRAINING DECISION TREE MODEL

In [18]:
model.fit(X_train,Y_train)

#### PREDICTING WITH DECISION TREE

In [19]:
y_pred = model.predict(X_test)

#### CREATING ACCURACY OF CLASSIFICATION MODEL

In [20]:
from sklearn.metrics import accuracy_score
print(accuracy_score(Y_test, y_pred))

0.8792028365325835


In [21]:
accuracy = (accuracy_score(Y_test, y_pred))*100
print("Your accuracy of classification model is: ", accuracy)

Your accuracy of classification model is:  87.92028365325835


In [22]:
i = "You are too bad and I do not like your attitude"
i = cv.transform([i]).toarray()
print(model.predict((i)))

['N e i t h e r   H a t e   n o r   O f f e n s i v e']


#### MAKING USER FRIENDLY

In [23]:
user_text = input("Enter your comment or message: ")

Enter your comment or message:  Science Is Coooooooooooool :)


In [24]:
user_text1 = cv.transform([user_text]).toarray()
result = model.predict(user_text1)

In [25]:
print("You typed: ", user_text)

if (result=='H a t e   S p e e c h'):
    print("Category:", result)
    print("Hate is a heavy burden to bear, let kindness be your foundation")
    print("I hope you stop spreading hate comments from now on")

elif (result=='O f f e n s i v e   S p e e c h'):
    print("Category:", result)
    print("The best messages are those that inspire, not offend")
    print("Your words can either build up or tear down; make them count")
    print("I hope you refrain from sharing offensive messages moving forward")

elif (result=='N e i t h e r   H a t e   n o r   O f f e n s i v e'):
    print("Category:", result)
    print("Neither hate nor offensive words can ever create a better tomorrow")
    print("Good Job!! Spread kindness over hate or offensive messages")

You typed:  Science Is Coooooooooooool :)
Category: ['N e i t h e r   H a t e   n o r   O f f e n s i v e']
Neither hate nor offensive words can ever create a better tomorrow
Good Job!! Spread kindness over hate or offensive messages
