In [1]:
import numpy as np
import pandas as pd
from google.colab import drive

drive.mount('/content/drive')

Mounted at /content/drive


# Import Libraries

In [2]:
pd.set_option('display.max_colwidth', None)

from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# Load Data

In [36]:
train = pd.read_csv('/content/drive/MyDrive/Classroom/GP 2023.02 IC/archive/Training.csv')
test = pd.read_csv('/content/drive/MyDrive/Classroom/GP 2023.02 IC/archive/Testing.csv')

In [4]:
print(f"train shape - {train.shape}")
print(f"train shape - {test.shape}")

train shape - (4920, 134)
train shape - (42, 133)


In [5]:
train.isnull().sum()

itching                    0
skin_rash                  0
nodal_skin_eruptions       0
continuous_sneezing        0
shivering                  0
                        ... 
blister                    0
red_sore_around_nose       0
yellow_crust_ooze          0
prognosis                  0
Unnamed: 133            4920
Length: 134, dtype: int64

In [37]:
train.drop('Unnamed: 133', axis=1, inplace=True)

In [7]:
train.head()

Unnamed: 0,itching,skin_rash,nodal_skin_eruptions,continuous_sneezing,shivering,chills,joint_pain,stomach_pain,acidity,ulcers_on_tongue,...,blackheads,scurring,skin_peeling,silver_like_dusting,small_dents_in_nails,inflammatory_nails,blister,red_sore_around_nose,yellow_crust_ooze,prognosis
0,1,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
1,0,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
2,1,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
3,1,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
4,1,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection


# Split Data

In [14]:
from sklearn.preprocessing import LabelEncoder

def label_encode_columns(df):
    label_encoder = LabelEncoder()
    for column in df.columns:
        if df[column].dtype == 'object' or df[column].dtype == 'category' or column in ['Gender']:
            df[column] = label_encoder.fit_transform(df[column])
    return df

In [12]:
def log_normalization(df):
    for column in df.columns:
      df[column] = np.log1p(df[column].astype(np.float64))
    return df

In [38]:
train = label_encode_columns(train)
test = label_encode_columns(test)

y = train.pop('prognosis')
y_test_2 = test.pop('prognosis')

train = log_normalization(train)
test = log_normalization(test)

In [39]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(train, y, test_size=0.3)

# Ml Model

In [40]:
svm = SVC(kernel='linear', random_state=42)

svm.fit(X_train, y_train)

y_pred_train = svm.predict(X_train)

y_pred_test = svm.predict(X_test)

acc_train = accuracy_score(y_train, y_pred_train)
acc_test = accuracy_score(y_test, y_pred_test)

print(f'Accuracy Train (SVM): {acc_train}')
print(f'Accuracy Test (SVM): {acc_test}')

Accuracy Train (SVM): 1.0
Accuracy Test (SVM): 1.0


In [43]:
from sklearn.metrics import roc_auc_score

svm = SVC(kernel='linear', random_state=42, probability=True)
svm.fit(X_train, y_train)

y_prob_train = svm.predict_proba(X_train)
y_prob_test = svm.predict_proba(X_test)
y_prob_test_2 = svm.predict_proba(test)

roc_auc_train = roc_auc_score(y_train, y_prob_train, multi_class='ovr')
roc_auc_test = roc_auc_score(y_test, y_prob_test, multi_class='ovr')
roc_auc_test_2 = roc_auc_score(y_test_2, y_prob_test_2, multi_class='ovr')

print(f'ROC AUC Train (SVM): {roc_auc_train}')
print(f'ROC AUC Test (SVM): {roc_auc_test}')
print(f'ROC AUC Test 2 (SVM): {roc_auc_test_2}')

ROC AUC Train (SVM): 1.0
ROC AUC Test (SVM): 1.0
ROC AUC Test 2 (SVM): 1.0
