Install RAPIDS

In [None]:
!nvidia-smi

In [None]:
# This get the RAPIDS-Colab install files and test check your GPU.  Run this and the next cell only.
# Please read the output of this cell.  If your Colab Instance is not RAPIDS compatible, it will warn you and give you remediation steps.
!git clone https://github.com/rapidsai/rapidsai-csp-utils.git
!python rapidsai-csp-utils/colab/pip-install.py

Install required dependencies

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import StratifiedKFold,ParameterGrid
from sklearn.preprocessing import StandardScaler
import cudf as cd
import cupy as cp
from cuml.svm import SVC

In [None]:
df=pd.read_csv('processed.csv')
print('Data shape:',df.shape)

In [None]:
list(enumerate(df.columns))

1-hot encoding

In [None]:
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [2])], remainder='passthrough')
X=ct.fit_transform(df)
# remove dummy variable and output
y=X[:,-1]
X=X[:,1:-1]
print(X.shape)
print(y.shape)

Defining functions (set your classifier here)

In [None]:
C_values=list(np.logspace(-3,3,7)) + [200]
gamma_values=list(np.logspace(-3,3,7)) + ['scale','auto']
param_grid=[
    {
        'kernel': ['rbf','sigmoid','linear'],
        'C': C_values,
        'gamma': gamma_values
    },
    { 'kernel': ['poly'], 'C': C_values, 'gamma': gamma_values, 'degree': [3,4,5] }
]

splits=list(StratifiedKFold(shuffle=True,random_state=0).split(X,y))
train_index, test_index = splits[0]
X_train, X_test = X[train_index], X[test_index]
y_train, y_test = y[train_index], y[test_index]
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

def GridSearch(X,y,param_grid: dict,cv=5,random_state=0):
    splits=list(StratifiedKFold(n_splits=cv,shuffle=True,random_state=random_state).split(X,y))
    best_score=-1
    best_params=None
    for comb in ParameterGrid(param_grid):
        # change ur classifier here
        clf=SVC(random_state=random_state,max_iter=int(1e9), **comb)
        acc_test=[]
        for train_index, test_index in splits:
            X_train, X_test = X[train_index], X[test_index]
            y_train, y_test = y[train_index], y[test_index]
            clf.fit(X_train,y_train)
            acc_test.append(accuracy_score(y_test,clf.predict(X_test)))
        if(np.mean(acc_test)>best_score):
            best_score=np.mean(acc_test)
            best_params=comb
    return best_params,best_score

def cv_scores(X,y,model):
    acc_test=[]
    acc_train=[]
    for train_index, test_index in StratifiedKFold(shuffle=True,random_state=1).split(X,y):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        sc = StandardScaler()
        X_train = sc.fit_transform(X_train)
        X_test = sc.transform(X_test)
        model.fit(X_train,y_train)
        acc_test.append(accuracy_score(y_test,model.predict(X_test)))
        acc_train.append(accuracy_score(y_train,model.predict(X_train)))
    print('Train acc:',np.mean(acc_train))
    print('Test acc:',np.mean(acc_test))

In [None]:
best_params,best_score = GridSearch(X_train,y_train,param_grid)
print(best_params)
print(best_score)