   # Sarcasm Detection with Machine Learning

In [67]:
import pandas as pd
import numpy as np
data = pd.read_json("Sarcasm.json", lines=True)
print(data.head())

                                        article_link  \
0  https://www.huffingtonpost.com/entry/versace-b...   
1  https://www.huffingtonpost.com/entry/roseanne-...   
2  https://local.theonion.com/mom-starting-to-fea...   
3  https://politics.theonion.com/boehner-just-wan...   
4  https://www.huffingtonpost.com/entry/jk-rowlin...   

                                            headline  is_sarcastic  
0  former versace store clerk sues over secret 'b...             0  
1  the 'roseanne' revival catches up to our thorn...             0  
2  mom starting to fear son's web series closest ...             1  
3  boehner just wants wife to listen, not come up...             1  
4  j.k. rowling wishes snape happy birthday in th...             0  


### The “is_sarcastic” column in this dataset contains the labels that we have to predict for the task of sarcasm detection. It contains binary values as 1 and 0, where 1 means sarcastic and 0 means not sarcastic. So for simplicity, I will transform the values of this column as “sarcastic” and “not sarcastic” instead of 1 and 0:

In [68]:
#data["is_sarcastic"] = data["is_sarcastic"].map({0: "Not Sarcasm", 1: "Sarcasm"})
#print(data.head())

In [73]:
data["is_sarcastic"] = data["is_sarcastic"].apply(lambda x:1 if x=="Sarcasm" else 0)

In [74]:
data

Unnamed: 0,article_link,headline,is_sarcastic
0,https://www.huffingtonpost.com/entry/versace-b...,former versace store clerk sues over secret 'b...,0
1,https://www.huffingtonpost.com/entry/roseanne-...,the 'roseanne' revival catches up to our thorn...,0
2,https://local.theonion.com/mom-starting-to-fea...,mom starting to fear son's web series closest ...,0
3,https://politics.theonion.com/boehner-just-wan...,"boehner just wants wife to listen, not come up...",0
4,https://www.huffingtonpost.com/entry/jk-rowlin...,j.k. rowling wishes snape happy birthday in th...,0
...,...,...,...
26704,https://www.huffingtonpost.com/entry/american-...,american politics in moral free-fall,0
26705,https://www.huffingtonpost.com/entry/americas-...,america's best 20 hikes,0
26706,https://www.huffingtonpost.com/entry/reparatio...,reparations and obama,0
26707,https://www.huffingtonpost.com/entry/israeli-b...,israeli ban targeting boycott supporters raise...,0


# import data set and extrect dependent and independent variables

# split the data into traning set and test set¶

In [75]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(data.headline,data.is_sarcastic)

In [77]:
from sklearn.feature_extraction.text import CountVectorizer
v=CountVectorizer()
x_train_count=v.fit_transform(x_train.values)
#x_train_count.toarray()[:3]

## Now I will be using the Bernoulli Naive Bayes algorithm to train a model for the task of sarcasm detection

In [78]:

from sklearn.naive_bayes import BernoulliNB

In [79]:
from sklearn.naive_bayes import MultinomialNB
model = MultinomialNB()
model.fit(x_train_count,y_train)

MultinomialNB()

###  score of model

In [82]:
x_count=v.transform(x_test)
model.score(x_count,y_test)

1.0

## Now let’s use a sarcastic text as input to test whether our machine learning model detects sarcasm or not:

In [81]:
user = input("Enter a Text: ")
data = v.transform([user]).toarray()
output = model.predict(data)
print(output)

Enter a Text: boehner just wants wife to listen, not come up...	
[0]
