In [1]:
from sklearn.datasets import load_svmlight_file
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.naive_bayes import GaussianNB
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
X, y = load_svmlight_file("C:\\Users\\alayp\\Downloads\\a1a.txt")
X_dense = X.todense().A
X_train, X_test, y_train, y_test = train_test_split(X_dense, y, test_size=0.3, random_state=0)

In [2]:
p = make_pipeline(StandardScaler(), GaussianNB())
p.fit(X_train, y_train)
Acc = p.score(X_test, y_test)
print(f"Using Gaussian Naive Bayes classifier Accuracy is: {Acc:.4f}")

Using Gaussian Naive Bayes classifier Accuracy is: 0.3568


In [3]:
dtP = make_pipeline(StandardScaler(), DecisionTreeClassifier(random_state=0))

calculationMetrics = {
    'decisiontreeclassifier__criterion': ['gini', 'entropy'],
    'decisiontreeclassifier__max_depth': [10, 50, 100]
}

gridS = GridSearchCV(dtP, calculationMetrics, cv=5)
gridS.fit(X_train, y_train)

print("Best parameters for Decision Tree:", gridS.best_params_)
print(f"Best training accuracy: {gridS.best_score_:.4f}")

Acc = gridS.best_estimator_.score(X_test, y_test)
print(f"Using Decision Tree Classifier accuracy is: {Acc:.4f}")

Best parameters for Decision Tree: {'decisiontreeclassifier__criterion': 'gini', 'decisiontreeclassifier__max_depth': 10}
Best training accuracy: 0.7863
Using Decision Tree Classifier accuracy is: 0.8112


In [4]:
svmP = make_pipeline(StandardScaler(), SVC(random_state=0))

svmParameters = {
    'svc__kernel': ['linear', 'poly', 'rbf'],
    'svc__degree': [2, 3],
    'svc__gamma': [0.001, 0.1, 2]
}

svmGridS = GridSearchCV(svmP, svmParameters, cv=5)
svmGridS.fit(X_train, y_train)

print("Best parameters for SVM:", svmGridS.best_params_)
print(f"Best training accuracy: {svmGridS.best_score_:.4f}")

BestModel = svmGridS.best_estimator_
Acc = BestModel.score(X_test, y_test)
print(f"Using SVM Classifier accuracy is: {Acc:.4f}")

Best parameters for SVM: {'svc__degree': 2, 'svc__gamma': 0.001, 'svc__kernel': 'linear'}
Best training accuracy: 0.8299
Using SVM Classifier accuracy is: 0.8050


In [5]:
# "Which classifier and parameter values gave the highest accuracy?"
# "The SVM Classifier gives highest and best accuracy"