In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import pandas as pd

from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split, ShuffleSplit, GridSearchCV
from sklearn import svm
from sklearn import metrics

In [3]:
data = pd.read_csv('Datasets/College.csv')
data.head()

Unnamed: 0,Private,Apps,Accept,Enroll,Top10perc,Top25perc,F.Undergrad,P.Undergrad,Outstate,Room.Board,Books,Personal,PhD,Terminal,S.F.Ratio,perc.alumni,Expend,Grad.Rate
0,Yes,1660,1232,721,23,52,2885,537,7440,3300,450,2200,70,78,18.1,12,7041,60
1,Yes,2186,1924,512,16,29,2683,1227,12280,6450,750,1500,29,30,12.2,16,10527,56
2,Yes,1428,1097,336,22,50,1036,99,11250,3750,400,1165,53,66,12.9,30,8735,54
3,Yes,417,349,137,60,89,510,63,12960,5450,450,875,92,97,7.7,37,19016,59
4,Yes,193,146,55,16,44,249,869,7560,4120,800,1500,76,72,11.9,2,10922,15


In [4]:
labelencoder = LabelEncoder()
data['Private'] = labelencoder.fit_transform(data['Private'])
data.head()

Unnamed: 0,Private,Apps,Accept,Enroll,Top10perc,Top25perc,F.Undergrad,P.Undergrad,Outstate,Room.Board,Books,Personal,PhD,Terminal,S.F.Ratio,perc.alumni,Expend,Grad.Rate
0,1,1660,1232,721,23,52,2885,537,7440,3300,450,2200,70,78,18.1,12,7041,60
1,1,2186,1924,512,16,29,2683,1227,12280,6450,750,1500,29,30,12.2,16,10527,56
2,1,1428,1097,336,22,50,1036,99,11250,3750,400,1165,53,66,12.9,30,8735,54
3,1,417,349,137,60,89,510,63,12960,5450,450,875,92,97,7.7,37,19016,59
4,1,193,146,55,16,44,249,869,7560,4120,800,1500,76,72,11.9,2,10922,15


In [5]:
X = data.iloc[:, 1:]
y = data['Private']

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=2020)

In [7]:
model_svm = svm.LinearSVC()
model_svm.fit(X_train, y_train)
y_pred = model_svm.predict(X_test)

In [8]:
print("\nAccuracy Score\n")
print(f"{metrics.accuracy_score(y_test, y_pred)*100:0.2f}%")


Accuracy Score

90.60%


In [9]:
scaler_df = StandardScaler().fit_transform(X)

In [10]:
X_train, X_test, y_train, y_test = train_test_split(scaler_df, y, test_size=0.30, random_state=2020)

In [11]:
model_svc = svm.LinearSVC()
model_svc.fit(X_train, y_train)

LinearSVC()

In [12]:
y_pred = model_svc.predict(X_test)

In [13]:
print("\nAccuracy Score\n")
print(f"{metrics.accuracy_score(y_test, y_pred)*100:0.2f}%")


Accuracy Score

94.44%


In [14]:
parameter_candidates = [
    {'C': [1, 10, 100, 1000], 'kernel': ['poly']},
    {'C': [1, 10, 100, 1000], 'kernel': ['linear']},
    {'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001], 'kernel': ['rbf']},
]

In [15]:
CV = ShuffleSplit()
clf = GridSearchCV(estimator=svm.SVC(max_iter=1000), param_grid=parameter_candidates, n_jobs=-1, cv=CV)
clf.fit(X_train, y_train)

# View the accuracy score
print(f'Best score: {clf.best_score_*100:0.2f}%')

# View the best parameters for the model found using grid search
print('Best C:', clf.best_estimator_.C)
print('Best Kernel:', clf.best_estimator_.kernel)
print('Best Gamma:', clf.best_estimator_.gamma)

Best score: 95.27%
Best C: 1000
Best Kernel: rbf
Best Gamma: 0.001
