In [1]:
import numpy as np
import pandas as pd
import scipy as sp

from sklearn.metrics import accuracy_score, roc_auc_score, f1_score, r2_score
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler, MinMaxScaler, LabelEncoder
from sklearn.svm import SVC

In [2]:
trainData  = 'https://raw.githubusercontent.com/Datamanim/datarepo/main/drug/train.csv'
testData  = 'https://raw.githubusercontent.com/Datamanim/datarepo/main/drug/test.csv'
subData  = 'https://raw.githubusercontent.com/Datamanim/datarepo/main/drug/submission.csv'

In [3]:
train = pd.read_csv(trainData)
test = pd.read_csv(testData)
sub = pd.read_csv(subData)

In [4]:
X = train.drop(['Drug'], axis=1)
y = train['Drug']

In [5]:
X['Sex'] = LabelEncoder().fit_transform(X['Sex'])
X['BP'] = LabelEncoder().fit_transform(X['BP'])
X['Cholesterol'] = LabelEncoder().fit_transform(X['Cholesterol'])

X['Na_to_K'] = StandardScaler().fit_transform(X[['Na_to_K']])

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [7]:
pipe = Pipeline([('scaler', StandardScaler()),('model', RandomForestClassifier())])
params = [{'model__random_state':[1, 10, 100], 'model__n_jobs':[1, 10, 100]}]
cv = GridSearchCV(pipe, params).fit(X_train, y_train)
y_pred = cv.predict(X_test)
accuracy_score(y_test, y_pred)

1.0

In [8]:
rf = RandomForestClassifier(random_state=42).fit(X_train, y_train)
y_pred = rf.predict(X_test)
accuracy_score(y_test, y_pred)

1.0

In [9]:
ab = AdaBoostClassifier(random_state=42).fit(X_train, y_train)
y_pred = ab.predict(X_test)
accuracy_score(y_test, y_pred)

0.9375

In [10]:
gb = GradientBoostingClassifier(random_state=42).fit(X_train, y_train)
y_pred = gb.predict(X_test)
accuracy_score(y_test, y_pred)

1.0

In [11]:
dt = DecisionTreeClassifier(random_state=42).fit(X_train, y_train)
y_pred = dt.predict(X_test)
accuracy_score(y_test, y_pred)

1.0

In [12]:
svc = SVC(gamma='auto').fit(X_train, y_train)
y_pred = svc.predict(X_test)
accuracy_score(y_test, y_pred)

0.5416666666666666

In [13]:
test['Sex'] = LabelEncoder().fit_transform(test['Sex'])
test['BP'] = LabelEncoder().fit_transform(test['BP'])
test['Cholesterol'] = LabelEncoder().fit_transform(test['Cholesterol'])
test['Na_to_K'] = StandardScaler().fit_transform(test[['Na_to_K']])

sub['0'] = ab.predict(test)
sub.to_csv('18061.csv', index=False)