# SVM Built-in model for Sarcasm Detection

### Import required libraries

In [1]:
import numpy as np
from math import exp
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn import svm
from sklearn.metrics import classification_report

### Read in and split data

In [2]:
dataset = pd.read_csv('Pre-processed_Sarcasm_Headlines_Dataset.csv')
x_1, x_2, y_1, y_2 = train_test_split(dataset['headline'], dataset['is_sarcastic'], test_size=0.25, random_state = 100)

### Vectorize dataset

In [3]:
vectorizer = TfidfVectorizer(min_df = 5, max_df = 0.8, sublinear_tf = True, use_idf = True)
X_1 = vectorizer.fit_transform(x_1)
X_2 = vectorizer.transform(x_2)

### Change split data into dataframes

In [4]:
x_1 = x_1[:, np.newaxis]
x_1 = pd.DataFrame(x_1)
x_1.columns = ['headline'] 

x_2 = x_2[:, np.newaxis]
x_2 = pd.DataFrame(x_2)
x_2.columns = ['headline']

y_1 = y_1[:, np.newaxis]
y_1 = pd.DataFrame(y_1)
y_1.columns = ['is_sarcastic']

y_2 = y_2[:, np.newaxis]
y_2 = pd.DataFrame(y_2)
y_2.columns = ['is_sarcastic']

### Fit SVM and predict on test data

In [5]:
classifier = svm.SVC(kernel='linear')
classifier.fit(X_1, y_1['is_sarcastic'])
prediction = classifier.predict(X_2)

### Print model statistics

In [6]:
print('Accuracy: ')
print (accuracy_score(y_2, prediction))

scores = classification_report(y_2['is_sarcastic'], prediction, output_dict=True)
print('positive: ', scores['1'])
print('negative: ', scores['0'])

Accuracy: 
0.82869122491764
positive:  {'precision': 0.8018169582772544, 'recall': 0.8110959836623554, 'f1-score': 0.806429780033841, 'support': 2938}
negative:  {'precision': 0.8502428494333514, 'recall': 0.8425133689839572, 'f1-score': 0.8463604619930164, 'support': 3740}
