In [1]:
import pandas as pd 
import numpy as np
import seaborn as sns
import joblib
import pickle
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix,classification_report,accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score

#preprocess
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.impute import KNNImputer, SimpleImputer
from sklearn.pipeline import Pipeline
from imblearn.over_sampling import RandomOverSampler
import warnings
warnings.filterwarnings("ignore")


# Load model

In [2]:
model = open('thyroid_model.pkl','rb')
svc = pickle.load(model)


In [3]:
label_predictor=joblib.load('label_encoder.joblib')


In [4]:
df=pd.read_csv('preprocessed_data.csv',index_col=0)


In [5]:
df


Unnamed: 0,age,sex,on_thyroxine,query_on_thyroxine,on_antithyroid_medication,sick,pregnant,thyroid_surgery,I131_treatment,query_hypothyroid,...,tumor,hypopituitary,psych,TSH,T3,TT4,T4U,FTI,referral_source,classes
0,41.0,0,0,0,0,0,0,0,0,0,...,0,0,0,1.30,2.5,125.0,1.14,109.0,1,negative
1,23.0,0,0,0,0,0,0,0,0,0,...,0,0,0,4.10,2.0,102.0,0.98,107.0,4,negative
2,46.0,1,0,0,0,0,0,0,0,0,...,0,0,0,0.98,2.0,109.0,0.91,120.0,4,negative
3,70.0,0,1,0,0,0,0,0,0,0,...,0,0,0,0.16,1.9,175.0,0.98,107.0,4,negative
4,70.0,0,0,0,0,0,0,0,0,0,...,0,0,0,0.72,1.2,61.0,0.87,70.0,3,negative
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2795,70.0,1,0,0,0,0,0,0,0,0,...,0,0,0,2.70,2.0,155.0,1.05,148.0,3,negative
2796,73.0,1,0,1,0,0,0,0,0,0,...,0,0,0,1.40,0.7,63.0,0.88,72.0,4,negative
2797,75.0,1,0,0,0,0,0,0,0,0,...,0,0,0,1.40,2.0,147.0,0.80,183.0,4,negative
2798,60.0,0,0,0,0,0,0,0,0,0,...,0,0,0,1.40,2.0,100.0,0.83,121.0,4,negative


# testing

In [6]:
target=df.drop(['classes'],axis=1)
target


Unnamed: 0,age,sex,on_thyroxine,query_on_thyroxine,on_antithyroid_medication,sick,pregnant,thyroid_surgery,I131_treatment,query_hypothyroid,...,goitre,tumor,hypopituitary,psych,TSH,T3,TT4,T4U,FTI,referral_source
0,41.0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1.30,2.5,125.0,1.14,109.0,1
1,23.0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,4.10,2.0,102.0,0.98,107.0,4
2,46.0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0.98,2.0,109.0,0.91,120.0,4
3,70.0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0.16,1.9,175.0,0.98,107.0,4
4,70.0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0.72,1.2,61.0,0.87,70.0,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2795,70.0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,2.70,2.0,155.0,1.05,148.0,3
2796,73.0,1,0,1,0,0,0,0,0,0,...,0,0,0,0,1.40,0.7,63.0,0.88,72.0,4
2797,75.0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,1.40,2.0,147.0,0.80,183.0,4
2798,60.0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1.40,2.0,100.0,0.83,121.0,4


In [7]:
testing=target.iloc[400].values.reshape(1,-1)
testing


array([[ 35.  ,   0.  ,   0.  ,   0.  ,   0.  ,   0.  ,   0.  ,   0.  ,
          0.  ,   0.  ,   0.  ,   0.  ,   0.  ,   0.  ,   0.  ,   0.  ,
         28.  ,   2.5 , 104.  ,   0.98, 107.  ,   4.  ]])

In [8]:
df.loc[400]


age                                             35.0
sex                                                0
on_thyroxine                                       0
query_on_thyroxine                                 0
on_antithyroid_medication                          0
sick                                               0
pregnant                                           0
thyroid_surgery                                    0
I131_treatment                                     0
query_hypothyroid                                  0
query_hyperthyroid                                 0
lithium                                            0
goitre                                             0
tumor                                              0
hypopituitary                                      0
psych                                              0
TSH                                             28.0
T3                                               2.5
TT4                                           

In [9]:
test=svc.predict(testing)
print(test)

if test==0:
    print('Compensated Hypothyroid')
elif test==1:
    print('Negative')
elif test==2:
    print('Primary Hypothyroid')
else:
    print('Secondary Hypothyroid')


[0]
Compensated Hypothyroid


In [10]:
#pd.Series(label_predictor).unique()
#df.classes.unique()


In [11]:
#df.classes.value_counts()


In [12]:
print(svc.predict([[46.0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.98,2,109,0.91,102,4]]))


[1]


In [13]:
data=[[65.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,55,0.7,15,1.08,14,3]]

svc.predict(data)[0]


2