In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split



In [2]:
train = pd.read_csv("preprocessed_train_data.csv", index_col = 0)
test = pd.read_csv("preprocessed_test_data.csv", index_col = 0)

trained_tweets = train['keyword']+train['text']
test_tweets = test['keyword']+test['text']

In [3]:
state = 12  
test_size = 0.30  
  
X_train, X_test, y_train, y_test = train_test_split(trained_tweets, train['target'],  
    test_size=test_size, random_state=state)

In [4]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.pipeline import FeatureUnion
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

word_vectorizer = TfidfVectorizer(
    analyzer='word',
    stop_words='english',
    ngram_range=(1, 3),
    lowercase=True,
    min_df=5,
    max_features=30000)

char_vectorizer = TfidfVectorizer(
    analyzer='char',
    stop_words='english',
    ngram_range=(3, 6),
    lowercase=True,
    min_df=5,
    max_features=50000)

vectorizer = FeatureUnion([('word_vectorizer', word_vectorizer),  ('char_vectorizer', char_vectorizer)])
vectorizer.fit(X_train)

X_train_vectors = vectorizer.transform(X_train).toarray()
X_test_vectors = vectorizer.transform(X_test).toarray()
print(X_train_vectors.shape, X_test_vectors.shape)

(5329, 44132) (2284, 44132)


In [None]:
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix

######## Linear Kernel

svclassifier = SVC(kernel='linear')
svclassifier.fit(X_train_vectors, y_train)

y_pred = svclassifier.predict(X_test_vectors)


print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test,y_pred))

In [None]:
######## Polynomial Kernal

svclassifier = SVC(kernel='poly', degree=8)
svclassifier.fit(X_train_vectors, y_train)

y_pred = svclassifier.predict(X_test_vectors)


print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test,y_pred))

In [None]:
######## Gaussian Kernal

svclassifier = SVC(kernel='rbf')
svclassifier.fit(X_train_vectors, y_train)

y_pred = svclassifier.predict(X_test_vectors)


print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test,y_pred))

In [None]:
######## Sigmoid Kernal

svclassifier = SVC(kernel='sigmoid')
svclassifier.fit(X_train_vectors, y_train)

y_pred = svclassifier.predict(X_test_vectors)


print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test,y_pred))