In [None]:
from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import SGDClassifier
from sklearn.pipeline import Pipeline
from sklearn import metrics
import numpy as np

 
    

def SVM(categories):
    
    twenty_train = fetch_20newsgroups(subset='train',categories=categories, shuffle=True, random_state=42)

    
    text_clf = Pipeline([('vect', CountVectorizer()),('tfidf', TfidfTransformer()),('clf', SGDClassifier(loss='hinge', penalty='l2',alpha=1e-3, random_state=42))])
    _ = text_clf.fit(twenty_train.data, twenty_train.target)

    # evaluate on test set
    twenty_test = fetch_20newsgroups(subset='test',categories=categories, shuffle=True, random_state=42)
    predicted = text_clf.predict(twenty_test.data)
    print("*** SVM Model ***")
    print("Newsgroup Categories : ", categories )
    print("Accuracy : {}%".format(np.mean(predicted == twenty_test.target)*100)) 
    print(metrics.classification_report(twenty_test.target, predicted, target_names=twenty_test.target_names))
    print("Confusion Matrix : \n", metrics.confusion_matrix(twenty_test.target, predicted))

    

In [None]:
categories=['sci.med','sci.space','sci.electronics']

In [None]:
SVM(categories)

*** SVM Model ***
Newsgroup Categories :  ['sci.med', 'sci.space', 'sci.electronics']
Accuracy : 93.99830938292477%
                 precision    recall  f1-score   support

sci.electronics       0.89      0.97      0.93       393
        sci.med       0.95      0.91      0.93       396
      sci.space       0.98      0.94      0.96       394

       accuracy                           0.94      1183
      macro avg       0.94      0.94      0.94      1183
   weighted avg       0.94      0.94      0.94      1183

Confusion Matrix : 
 [[381   9   3]
 [ 32 360   4]
 [ 13  10 371]]
