# **DISEASE PREDICTION**

**Import necessary libraries**

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.naive_bayes import GaussianNB
import joblib

In [2]:
import math
import random
from random import shuffle
import sys
import gmpy2
from time import time
from Crypto.Util.number import getPrime

# Encryption

In [3]:
def gcd(a,b):
    while b > 0:
        a, b = b, a % b
    return a

In [4]:
def lcm(a, b):
    return a * b // gcd(a, b) 

In [5]:
def int_time():
    return int(round(time() * 1000))

In [6]:
class PrivateKey(object):
    def __init__(self, p, q, n):
        self.p = p
        self.q = q
        self.l = (p-1) * (q-1)
        self.m = gmpy2.invert(self.l, n)  #1/fi(n)
    def __repr__(self):
        return '<PrivateKey: %s %s>' % (self.l, self.m)

In [7]:
class PublicKey(object):

    @classmethod
    def from_n(cls, n):
        return cls(n)
    def __init__(self, n):
        self.n = n
        self.n_sq = n * n
        self.g = n + 1
    def __repr__(self):
        return '<PublicKey: %s>' % self.n

In [8]:
def generate_keypair(bits):
    p_equal_q = True
    while p_equal_q:
        p = getPrime(bits // 2)
        q = getPrime(bits // 2)
        if (p!=q):
            p_equal_q = False
    n = p * q
    return PrivateKey(p, q, n), PublicKey(n)

In [9]:
def encrypt(pub, plain):
    one = gmpy2.mpz(1)
    state = gmpy2.random_state(int_time())
    r = gmpy2.mpz_random(state,pub.n)
    while gmpy2.gcd(r,pub.n) != one:
        state = gmpy2.random_state(int_time())
        r = gmpy2.mpz_random(state,pub.n)
    x = gmpy2.powmod(r,pub.n,pub.n_sq)
    cipher = gmpy2.f_mod(gmpy2.mul(gmpy2.powmod(pub.g,plain,pub.n_sq),x),pub.n_sq)
    return cipher

In [10]:
def decrypt(priv, pub, cipher):
    one = gmpy2.mpz(1)
    x = gmpy2.sub(gmpy2.powmod(cipher,priv.l,pub.n_sq),one)
    plain = gmpy2.f_mod(gmpy2.mul(gmpy2.f_div(x,pub.n),priv.m),pub.n)
    if plain >= gmpy2.f_div(pub.n,2):
        plain = plain - pub.n
    return plain

In [11]:
priv_key, pub_key = generate_keypair(100)
print(priv_key)
print(pub_key)

<PrivateKey: 983805829539610074307087390032 307633043610127283274688542101>
<PublicKey: 983805829539612059169265882361>


In [12]:
import json
def keypair_dump_jwk(priv, pub):
    rec_pub = {
        'n': pub.n
    }

    rec_priv = {
        'p': priv.p,
        'q': priv.q,
    }

    priv_jwk = json.dumps(rec_priv)
    pub_jwk = json.dumps(rec_pub)
    return pub_jwk, priv_jwk

In [13]:
pub_jwk, priv_jwk = keypair_dump_jwk(priv_key, pub_key)

In [14]:
with open("phe_key.priv", "w") as F:
    F.write(priv_jwk + "\n")
    print("Written private key to {}".format(F.name))

Written private key to phe_key.priv


In [15]:
with open("phe_key.pub", "w") as F:
    F.write(pub_jwk + "\n")
    print("Written public key to {}".format(F.name))

Written public key to phe_key.pub


In [16]:
def keypair_load_jwk(pub_jwk, priv_jwk):
    
    rec_pub = json.loads(pub_jwk)
    rec_priv = json.loads(priv_jwk)

    n = rec_pub['n']
    p = rec_priv['p']
    q = rec_priv['q']

    return PrivateKey(p, q, n), PublicKey(n)

In [17]:
with open("phe_key.pub", "r") as F:
     pub_jwk = F.read()

with open("phe_key.priv", "r") as F:
     priv_jwk = F.read()

priv_key, pub_key = keypair_load_jwk(pub_jwk, priv_jwk)
print(priv_key)
print(pub_key)

<PrivateKey: 983805829539610074307087390032 307633043610127283274688542101>
<PublicKey: 983805829539612059169265882361>


**Reading datasets**

In [18]:
df1 = pd.read_csv("datasets/dataset1.csv")
df2 = pd.read_csv("datasets/dataset2.csv")
df3 = pd.read_csv("datasets/dataset3.csv")

In [19]:
df1.replace({'disease':{'Fungal infection':0,'Allergy':1,'GERD':2,'Chronic cholestasis':3,'Drug Reaction':4,
'Peptic ulcer diseae':5,'AIDS':6,'Diabetes ':7,'Gastroenteritis':8,'Bronchial Asthma':9,'Hypertension ':10,
'Migraine':11,'Cervical spondylosis':12,
'Paralysis (brain hemorrhage)':13,'Jaundice':14,'Malaria':15,'Chicken pox':16,'Dengue':17,'Typhoid':18,'hepatitis A':19,
'Hepatitis B':20,'Hepatitis C':21,'Hepatitis D':22,'Hepatitis E':23,'Alcoholic hepatitis':24,'Tuberculosis':25,
'Common Cold':26,'Pneumonia':27,'Dimorphic hemmorhoids(piles)':28,'Heart attack':29,'Varicose veins':30,'Hypothyroidism':31,
'Hyperthyroidism':32,'Hypoglycemia':33,'Osteoarthristis':34,'Arthritis':35,
'(vertigo) Paroymsal  Positional Vertigo':36,'Acne':37,'Urinary tract infection':38,'Psoriasis':39,
'Impetigo':40}},inplace=True)

In [20]:
df2.replace({'disease':{'Fungal infection':0,'Allergy':1,'GERD':2,'Chronic cholestasis':3,'Drug Reaction':4,
'Peptic ulcer diseae':5,'AIDS':6,'Diabetes ':7,'Gastroenteritis':8,'Bronchial Asthma':9,'Hypertension ':10,
'Migraine':11,'Cervical spondylosis':12,
'Paralysis (brain hemorrhage)':13,'Jaundice':14,'Malaria':15,'Chicken pox':16,'Dengue':17,'Typhoid':18,'hepatitis A':19,
'Hepatitis B':20,'Hepatitis C':21,'Hepatitis D':22,'Hepatitis E':23,'Alcoholic hepatitis':24,'Tuberculosis':25,
'Common Cold':26,'Pneumonia':27,'Dimorphic hemmorhoids(piles)':28,'Heart attack':29,'Varicose veins':30,'Hypothyroidism':31,
'Hyperthyroidism':32,'Hypoglycemia':33,'Osteoarthristis':34,'Arthritis':35,
'(vertigo) Paroymsal  Positional Vertigo':36,'Acne':37,'Urinary tract infection':38,'Psoriasis':39,
'Impetigo':40}},inplace=True)

In [21]:
df3.replace({'disease':{'Fungal infection':0,'Allergy':1,'GERD':2,'Chronic cholestasis':3,'Drug Reaction':4,
'Peptic ulcer diseae':5,'AIDS':6,'Diabetes ':7,'Gastroenteritis':8,'Bronchial Asthma':9,'Hypertension ':10,
'Migraine':11,'Cervical spondylosis':12,
'Paralysis (brain hemorrhage)':13,'Jaundice':14,'Malaria':15,'Chicken pox':16,'Dengue':17,'Typhoid':18,'hepatitis A':19,
'Hepatitis B':20,'Hepatitis C':21,'Hepatitis D':22,'Hepatitis E':23,'Alcoholic hepatitis':24,'Tuberculosis':25,
'Common Cold':26,'Pneumonia':27,'Dimorphic hemmorhoids(piles)':28,'Heart attack':29,'Varicose veins':30,'Hypothyroidism':31,
'Hyperthyroidism':32,'Hypoglycemia':33,'Osteoarthristis':34,'Arthritis':35,
'(vertigo) Paroymsal  Positional Vertigo':36,'Acne':37,'Urinary tract infection':38,'Psoriasis':39,
'Impetigo':40}},inplace=True)

In [22]:
df1_copy = df1
for col in df1_copy.columns:
    res = []
    for index, row in df1_copy.iterrows():
        if(isinstance(row[col], int)):
            c = row[col]
        else:
            c = row[col].item()
        M1 = encrypt(pub_key, c)
        res.append(M1)
        
    df1_copy[col] = res
        
df1_copy.head()
df1_copy.to_csv('Encrypted\dataset1_encrypted.csv', header=True, index=False)

In [23]:
df2_copy = df2
for col in df2_copy.columns:
    res = []
    for index, row in df2_copy.iterrows():
        if(isinstance(row[col], int)):
            c = row[col]
        else:
            c = row[col].item()
        M1 = encrypt(pub_key, c)
        res.append(M1)
        
    df2_copy[col] = res
        
df2_copy.head()
df2_copy.to_csv('Encrypted\dataset2_encrypted.csv', header=True, index=False)

In [24]:
df3_copy = df3
for col in df3_copy.columns:
    res = []
    for index, row in df3_copy.iterrows():
        if(isinstance(row[col], int)):
            c = row[col]
        else:
            c = row[col].item()
        M1 = encrypt(pub_key, c)
        res.append(M1)
        
    df3_copy[col] = res
        
df3_copy.head()
df3_copy.to_csv('Encrypted\dataset3_encrypted.csv', header=True, index=False)

In [25]:
df1_encrypted = pd.read_csv('Encrypted\dataset1_encrypted.csv')
df1_encrypted.head()

Unnamed: 0,itching,skin_rash,nodal_skin_eruptions,continuous_sneezing,shivering,chills,joint_pain,stomach_pain,acidity,ulcers_on_tongue,...,high_fever,sunken_eyes,breathlessness,sweating,dehydration,indigestion,headache,yellowish_skin,dark_urine,disease
0,1457048347930246093861293140557570960837459346...,1108259719633017336513550696893626574071049241...,3605393176859853941888076037883442716714228142...,1401321625475462725778554700696310051824874129...,2316735187519339338838001061988287520942959819...,3024389572401118591213091340586910178513161493...,5223325944617320640195360725874567587173358020...,5495548007829743047037637923543780658446999153...,1396723784774494623684449030459609701280859158...,7822874317929379487667986829014472248761435258...,...,1471600008518183001074491796347598684956224349...,5654271955936075545401096737526643178919274625...,4975674579021682978136276465911019539237954769...,6232231537282221073816548040868209576846828364...,4895104438228239902874491585225333901202846131...,4213860855324402331982083701937160116019030015...,1500263718701772379953887097633617559936894968...,6232278219673961115046723459895632664128777613...,1275086490225926004130616811573495564473705134...,4888612703548080925416840301460590952244997964...
1,1768832288290466740987067899251551242113024030...,1275529740401831507537499468554675587795047140...,5026297991502602445422187224989970858097156048...,1401321625475462725778554700696310051824874129...,1179347862326929366598373336525320427156472516...,2345490235397547330479748276788374905463599122...,5223325944617320640195360725874567587173358020...,5495548007829743047037637923543780658446999153...,3736982369815001466348917089051692148160727788...,3596343760165906912977549903607021523400621165...,...,1471600008518183001074491796347598684956224349...,3787712940920341154177887076033175438515457495...,7425001526723342082132682162012481992756856770...,4604549308352560087712141651145364189273156723...,4895104438228239902874491585225333901202846131...,1025829810447734913218562959958308356693470932...,1500263718701772379953887097633617559936894968...,6376025907812176900624749995311121409005602131...,1275086490225926004130616811573495564473705134...,4641244207935751596411208971158456784203352658...
2,5324552576824899011567445858605916957604021676...,7946521649317805244703945515239743554971341211...,5026297991502602445422187224989970858097156048...,2998815885820111006270963836087789741708674032...,1179347862326929366598373336525320427156472516...,7971950607230784151182615332763180245929877870...,4185452654814229325411146936202672467433078212...,4138348387449176986299447898621770115509475421...,3736982369815001466348917089051692148160727788...,6529999133234582677548332327505521136996911174...,...,4177045020019536896111477878420787602715847613...,5764041721106982173159627082087429067726373206...,7425001526723342082132682162012481992756856770...,4604549308352560087712141651145364189273156723...,4895104438228239902874491585225333901202846131...,4365837295178113175362087114457818697006948753...,1429854053100079808638573725157035125439142966...,6376025907812176900624749995311121409005602131...,4072921187505015840453017448849314403659265390...,6400837710883257557946002069386899728546357475...
3,5324552576824899011567445858605916957604021676...,3665800559635475821387174548079481209632218914...,6967886257608041980820168528158506739860027817...,2998815885820111006270963836087789741708674032...,7339848214732527117635194630089622554218091300...,7971950607230784151182615332763180245929877870...,5140816473316381913397651839451854956064058007...,4138348387449176986299447898621770115509475421...,8015657274647955010007228662577273741533667942...,6529999133234582677548332327505521136996911174...,...,6330485555271839447772558510606907431464035309...,5764041721106982173159627082087429067726373206...,6109558136571130860501360632974930893130927978...,9142625219975615973822626525104766653665804649...,4101689694043522277481184553757613273346766909...,4365837295178113175362087114457818697006948753...,1429854053100079808638573725157035125439142966...,4265998465618224513235318440752099161528769904...,6942500991896127482202108822613140923070629111...,3246950813114907153401947794369785930710662215...
4,6438459610959276158028635960492876107768156180...,3665800559635475821387174548079481209632218914...,3320913462226983790787416567716346354321935374...,2998815885820111006270963836087789741708674032...,4634349952640298324554829694973055834898215558...,7971950607230784151182615332763180245929877870...,5140816473316381913397651839451854956064058007...,7983501794095780591318781835032668977921089458...,3102183235088822406349310350700899660639872011...,1177823826307530876674941444305058043410132870...,...,7174246278401356942222089919233645114749996085...,4325184647830818612641088182925361820775160545...,6095114499912419564693169250057786756057609272...,5254397861533769355462611895628672160277321787...,5641836481913807385156329801863330555209717877...,4365837295178113175362087114457818697006948753...,7839009737747805473453598020807749948251702589...,4265998465618224513235318440752099161528769904...,6942500991896127482202108822613140923070629111...,3246950813114907153401947794369785930710662215...


In [26]:
df2_encrypted = pd.read_csv('Encrypted\dataset2_encrypted.csv')
df2_encrypted.head()

Unnamed: 0,nausea,loss_of_appetite,pain_behind_the_eyes,back_pain,constipation,abdominal_pain,diarrhoea,mild_fever,yellow_urine,yellowing_of_eyes,...,fast_heart_rate,pain_during_bowel_movements,pain_in_anal_region,bloody_stool,irritation_in_anus,neck_pain,dizziness,cramps,bruising,disease
0,1524642818444015381325399474376169212813830705...,4750421908817886659837775657349689500632235779...,7941888051534218541109740676749208423622384859...,6463515135598206456712542192025830455420744098...,1372335412232106219092151195511487878161434419...,4240799393973085052821192942681159568382483309...,3622498302555525720534249218016948070884252974...,5272725515778655325426050442137651197465724391...,1888482822713563747104893707090172597648991399...,2344773765708178394211926714962804134646240485...,...,6898336925849870055977087151289696919940934661...,5734772417127097747185525670835626455840904450...,1977341126874522458935112718557051116429671407...,4006849553393009447983665959370165486468395488...,4322053848454565833519016662455292841474091481...,9862549782687066775338348364411378230046141414...,2313892110591822392528877148281624649893426105...,1504756362536930128065705042644074005461492237...,6708019932743262849059800098265681321188808864...,4603020070140187550037041485784965700263213082...
1,5964668177788363087976068803595186341067800820...,4750421908817886659837775657349689500632235779...,7941888051534218541109740676749208423622384859...,6463515135598206456712542192025830455420744098...,1372335412232106219092151195511487878161434419...,4240799393973085052821192942681159568382483309...,5355605514254424439836341706349672069664651669...,5272725515778655325426050442137651197465724391...,2555525136630075852692359750532455499319549967...,2344773765708178394211926714962804134646240485...,...,6898336925849870055977087151289696919940934661...,5734772417127097747185525670835626455840904450...,6236247770727944377126462404358560812911718460...,5689178479366236362973798339261650545422360231...,4322053848454565833519016662455292841474091481...,9862549782687066775338348364411378230046141414...,4479013236540942300225591191720913397463669846...,1504756362536930128065705042644074005461492237...,1765550112926539792797462745874302277209182824...,6559882648469449281795027788716816469446468548...
2,5964668177788363087976068803595186341067800820...,3198481952104314423172455685319635424603966937...,1218065014434137145842589869489447934069126663...,9361454683585374752405759125594300621590702928...,4027924733743069354103663497586499393918952647...,6271095164551478427903145739276469833103278628...,5948087469049257961581462953846424506413131604...,6078561805692881309080627372066005912209641002...,2555525136630075852692359750532455499319549967...,2110691654614229001786346698418505333749092080...,...,6898336925849870055977087151289696919940934661...,3123536170196210967819955661198704356973622751...,3445752128560713106974587328559233988285753606...,5689178479366236362973798339261650545422360231...,2243415220335173746904531648657821263143449366...,7118321973327198984651264596508200859662016888...,4479013236540942300225591191720913397463669846...,1504756362536930128065705042644074005461492237...,1765550112926539792797462745874302277209182824...,6449430269572482498765910905556867158961274268...
3,5964668177788363087976068803595186341067800820...,3198481952104314423172455685319635424603966937...,1218065014434137145842589869489447934069126663...,3688775087231344517192863608900107835463191729...,7856078361963792153652955976577558101091256459...,5906571536167286283241000061406635896457855062...,6961690768393332934591377877059448251447166418...,6078561805692881309080627372066005912209641002...,5579512397460021936409527277297635098165258194...,2110691654614229001786346698418505333749092080...,...,4298533307706343103879630384188902168554948106...,6162744649419066498264273970355801644976686959...,3445752128560713106974587328559233988285753606...,5329171104558037120340377050679294105961342789...,5766326562079176173786090903692713438735084306...,7118321973327198984651264596508200859662016888...,3540503187536031521005164336383451386604532041...,9129386550895729442540320301031815398557734566...,2735321950925585151841769169111470789255605117...,6449430269572482498765910905556867158961274268...
4,2773635392069285067916459977855587891257811045...,5279589730356046512154227144604030514702427950...,3398820642162085568735361266523927039966796689...,4565196386946203111848206062689468509678203013...,1753992650398389890385842499930422002416963181...,5679066301551757445374788578913796312010932702...,6961690768393332934591377877059448251447166418...,3565251401372093268790370269705256790289364587...,5579512397460021936409527277297635098165258194...,2524630559387557057664024625105497198324432990...,...,7567316265717341357383606401392809658770945720...,6162744649419066498264273970355801644976686959...,7942435382353234341302404021730679180170891583...,5962716687296868183793802539628906610472394765...,5222596561227251991729076929240794118656843038...,3049793788509993848441925478886163756226198611...,3540503187536031521005164336383451386604532041...,9129386550895729442540320301031815398557734566...,2505821718392860935594279502633781061175281833...,1237117959944246497531109463604828690113063860...


In [27]:
df3_encrypted = pd.read_csv('Encrypted\dataset3_encrypted.csv')
df3_encrypted.head()

Unnamed: 0,obesity,swollen_legs,swollen_blood_vessels,puffy_face_and_eyes,enlarged_thyroid,brittle_nails,swollen_extremeties,excessive_hunger,extra_marital_contacts,drying_and_tingling_lips,...,blackheads,scurring,skin_peeling,silver_like_dusting,small_dents_in_nails,inflammatory_nails,blister,red_sore_around_nose,yellow_crust_ooze,disease
0,6776831126111136605381919290190342643797491144...,1074279587046436362158777956121934208073064239...,5038816459937171135611110251022839542371407710...,4062162666378878194871394379816978922070180735...,1934265187164239273033466878115387001881375294...,2384088049370124390601647806650761887577419791...,6493753369999121317069006057748768857822542982...,3687197694887031512938827473739711113594048795...,6088131143010857807004680059182538977421478099...,4380774338820581297908216833234577813773788336...,...,2385956315718858623153775872044054312327182599...,5061161360919951160713098673355247079593947409...,6446335422227552754122245606878018955777891282...,6669845118672711559973356973039329614956789861...,1862949509127474203319332107168660103725213454...,4522853400545078829310662208920179574798098899...,1654287289442864673681932052437941239970819670...,4842615811019700591183507778085761718672438052...,7264457197684062072080196918525744187131427739...,2158124973944494820310824600898776523706711641...
1,2309707502254021461270042934008624307996156151...,8248286607831616572706730922542716446058088576...,5038816459937171135611110251022839542371407710...,2939539359976951370004963956039576032954499511...,1934265187164239273033466878115387001881375294...,2384088049370124390601647806650761887577419791...,4758972490271570707250588664961500867383673100...,1901901096503422860448182061886175933280609882...,3675201593530158052904920463979488240055871127...,4380774338820581297908216833234577813773788336...,...,2510391841986031544750464654148220184701059607...,1516611041535238087186699285757629053753548326...,5692540370470655413131998253189094324479467347...,1053245463420399154358316406272821757039025143...,2445652806131342177307858329700260482639698281...,6945065467792185204429346206412691004818527186...,4702236499371202966667955122501494734420192332...,1000371329993010123388629074507429731357938022...,2518747441126346286068206504883313222614565528...,5960935572368203577730738438192828758762171928...
2,1367512234169900206307539665145477638378813150...,8248286607831616572706730922542716446058088576...,6169112760324674675433807406625062025708488858...,2939539359976951370004963956039576032954499511...,1285776499776916361312455069724635828708840054...,3068331795204408522966196269263119360143595297...,4758972490271570707250588664961500867383673100...,1800149511401253081099085070707235414750406088...,3675201593530158052904920463979488240055871127...,7227118202707326250828508218770611949922369863...,...,3931915600412056508425015864213346870800880807...,3603929017910055457748578193381381154847718441...,2228349939927940784845812286243036448037581465...,1779940472895556791462705905791190123877005429...,2107921557417833310172668011272211552722623341...,7045235559592599503905563917815869248023518197...,4620662413416035387740051820665671292431280273...,2992743153989141050744026899180814935928515456...,2529254281840941730843810665511198662756985595...,6684975152369197157079484179578434353281810708...
3,1123835301969238335221561072142749450536111666...,3708053112346785079585440521473190662354138479...,6169112760324674675433807406625062025708488858...,3558237102816322605575551516314166716073068184...,1285776499776916361312455069724635828708840054...,3870942741371580776668959870470482720244972100...,6415697156988431045494092939668067986635602521...,1800149511401253081099085070707235414750406088...,4268838300590384191542254986464207627504920158...,7227118202707326250828508218770611949922369863...,...,1030107535271734117802455062892417112913481889...,3682204234580733402152129587165342298840964283...,7159807408541718940021682809327931659948830978...,1299591633602201222866849469971368651536115146...,5941075913592787131471380907816199052062399697...,6322739087514043479313982758492275479579786810...,6773264218409685549419103364365989264832865541...,2027123369611845703492733568739599477958871439...,1692767665382471959278096977752230356142840879...,1246883780298984911549441993852897756029676266...
4,1123835301969238335221561072142749450536111666...,3708053112346785079585440521473190662354138479...,5677595554502779859966956041973026017419105165...,4470791497563367227773239008833905199214204581...,5514896106491664125137869912074859409173838744...,3870942741371580776668959870470482720244972100...,2100530695424791539536908249612025808173360805...,1800149511401253081099085070707235414750406088...,4796049333141414445375235609575193234790504916...,4695260368744994246625760798961244767260040552...,...,4294804565749473389960408926502921167381468620...,5255273931313166106417856382216937019832114867...,3963809089615521068224192477142780399284472711...,4523611493032598051420180259672594803903725199...,3092304733833765846494123499652366472203224077...,1702855615504379445496162290312160862455294527...,7238447712200032269528713401341806021494661837...,5260599736689964358197424410982170260556266069...,1951960414749791008151092519899258545116132890...,6945603586842100044911270983649442680384993438...


In [28]:
df1 = df1_encrypted.drop('disease',axis = 1)
df2 = df2_encrypted.drop('disease',axis = 1)

In [29]:
df = pd.concat([df1,df2,df3_encrypted],axis=1,join='inner')
df.head()

Unnamed: 0,itching,skin_rash,nodal_skin_eruptions,continuous_sneezing,shivering,chills,joint_pain,stomach_pain,acidity,ulcers_on_tongue,...,blackheads,scurring,skin_peeling,silver_like_dusting,small_dents_in_nails,inflammatory_nails,blister,red_sore_around_nose,yellow_crust_ooze,disease
0,1457048347930246093861293140557570960837459346...,1108259719633017336513550696893626574071049241...,3605393176859853941888076037883442716714228142...,1401321625475462725778554700696310051824874129...,2316735187519339338838001061988287520942959819...,3024389572401118591213091340586910178513161493...,5223325944617320640195360725874567587173358020...,5495548007829743047037637923543780658446999153...,1396723784774494623684449030459609701280859158...,7822874317929379487667986829014472248761435258...,...,2385956315718858623153775872044054312327182599...,5061161360919951160713098673355247079593947409...,6446335422227552754122245606878018955777891282...,6669845118672711559973356973039329614956789861...,1862949509127474203319332107168660103725213454...,4522853400545078829310662208920179574798098899...,1654287289442864673681932052437941239970819670...,4842615811019700591183507778085761718672438052...,7264457197684062072080196918525744187131427739...,2158124973944494820310824600898776523706711641...
1,1768832288290466740987067899251551242113024030...,1275529740401831507537499468554675587795047140...,5026297991502602445422187224989970858097156048...,1401321625475462725778554700696310051824874129...,1179347862326929366598373336525320427156472516...,2345490235397547330479748276788374905463599122...,5223325944617320640195360725874567587173358020...,5495548007829743047037637923543780658446999153...,3736982369815001466348917089051692148160727788...,3596343760165906912977549903607021523400621165...,...,2510391841986031544750464654148220184701059607...,1516611041535238087186699285757629053753548326...,5692540370470655413131998253189094324479467347...,1053245463420399154358316406272821757039025143...,2445652806131342177307858329700260482639698281...,6945065467792185204429346206412691004818527186...,4702236499371202966667955122501494734420192332...,1000371329993010123388629074507429731357938022...,2518747441126346286068206504883313222614565528...,5960935572368203577730738438192828758762171928...
2,5324552576824899011567445858605916957604021676...,7946521649317805244703945515239743554971341211...,5026297991502602445422187224989970858097156048...,2998815885820111006270963836087789741708674032...,1179347862326929366598373336525320427156472516...,7971950607230784151182615332763180245929877870...,4185452654814229325411146936202672467433078212...,4138348387449176986299447898621770115509475421...,3736982369815001466348917089051692148160727788...,6529999133234582677548332327505521136996911174...,...,3931915600412056508425015864213346870800880807...,3603929017910055457748578193381381154847718441...,2228349939927940784845812286243036448037581465...,1779940472895556791462705905791190123877005429...,2107921557417833310172668011272211552722623341...,7045235559592599503905563917815869248023518197...,4620662413416035387740051820665671292431280273...,2992743153989141050744026899180814935928515456...,2529254281840941730843810665511198662756985595...,6684975152369197157079484179578434353281810708...
3,5324552576824899011567445858605916957604021676...,3665800559635475821387174548079481209632218914...,6967886257608041980820168528158506739860027817...,2998815885820111006270963836087789741708674032...,7339848214732527117635194630089622554218091300...,7971950607230784151182615332763180245929877870...,5140816473316381913397651839451854956064058007...,4138348387449176986299447898621770115509475421...,8015657274647955010007228662577273741533667942...,6529999133234582677548332327505521136996911174...,...,1030107535271734117802455062892417112913481889...,3682204234580733402152129587165342298840964283...,7159807408541718940021682809327931659948830978...,1299591633602201222866849469971368651536115146...,5941075913592787131471380907816199052062399697...,6322739087514043479313982758492275479579786810...,6773264218409685549419103364365989264832865541...,2027123369611845703492733568739599477958871439...,1692767665382471959278096977752230356142840879...,1246883780298984911549441993852897756029676266...
4,6438459610959276158028635960492876107768156180...,3665800559635475821387174548079481209632218914...,3320913462226983790787416567716346354321935374...,2998815885820111006270963836087789741708674032...,4634349952640298324554829694973055834898215558...,7971950607230784151182615332763180245929877870...,5140816473316381913397651839451854956064058007...,7983501794095780591318781835032668977921089458...,3102183235088822406349310350700899660639872011...,1177823826307530876674941444305058043410132870...,...,4294804565749473389960408926502921167381468620...,5255273931313166106417856382216937019832114867...,3963809089615521068224192477142780399284472711...,4523611493032598051420180259672594803903725199...,3092304733833765846494123499652366472203224077...,1702855615504379445496162290312160862455294527...,7238447712200032269528713401341806021494661837...,5260599736689964358197424410982170260556266069...,1951960414749791008151092519899258545116132890...,6945603586842100044911270983649442680384993438...


In [30]:
df_copy = df
for col in df_copy.columns:
    res = []
    for index, row in df_copy.iterrows():
        if(isinstance(row[col], str)):
            c = int(row[col])
        else:
            c1 = row[col].tolist()
            c = int(c1[0])
        M1 = decrypt(priv_key, pub_key, c)
        res.append(M1)
        
    df_copy[col] = res
        
df_copy.head()
df_copy.to_csv('Decrypted\dataset_decrypted.csv', header=True, index=False)

In [31]:
df_decrypted = pd.read_csv('Decrypted\dataset_decrypted.csv')
df_decrypted.head()

Unnamed: 0,itching,skin_rash,nodal_skin_eruptions,continuous_sneezing,shivering,chills,joint_pain,stomach_pain,acidity,ulcers_on_tongue,...,blackheads,scurring,skin_peeling,silver_like_dusting,small_dents_in_nails,inflammatory_nails,blister,red_sore_around_nose,yellow_crust_ooze,disease
0,1,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,1,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [33]:
print("Symptoms:")
print("")
for i in df_decrypted.columns:
    print(i)

Symptoms:

itching
skin_rash
nodal_skin_eruptions
continuous_sneezing
shivering
chills
joint_pain
stomach_pain
acidity
ulcers_on_tongue
muscle_wasting
vomiting
burning_micturition
spotting_ urination
fatigue
weight_gain
anxiety
cold_hands_and_feets
mood_swings
weight_loss
restlessness
lethargy
patches_in_throat
irregular_sugar_level
cough
high_fever
sunken_eyes
breathlessness
sweating
dehydration
indigestion
headache
yellowish_skin
dark_urine
nausea
loss_of_appetite
pain_behind_the_eyes
back_pain
constipation
abdominal_pain
diarrhoea
mild_fever
yellow_urine
yellowing_of_eyes
acute_liver_failure
fluid_overload
swelling_of_stomach
swelled_lymph_nodes
malaise
blurred_and_distorted_vision
phlegm
throat_irritation
redness_of_eyes
sinus_pressure
runny_nose
congestion
chest_pain
weakness_in_limbs
fast_heart_rate
pain_during_bowel_movements
pain_in_anal_region
bloody_stool
irritation_in_anus
neck_pain
dizziness
cramps
bruising
obesity
swollen_legs
swollen_blood_vessels
puffy_face_and_eyes
enla

In [34]:
print("Total no of samples in the dataset:",df_decrypted.shape)

Total no of samples in the dataset: (4920, 133)


**Filtering diseases**

In [38]:
df = df_decrypted.copy()

In [39]:
df

Unnamed: 0,itching,skin_rash,nodal_skin_eruptions,continuous_sneezing,shivering,chills,joint_pain,stomach_pain,acidity,ulcers_on_tongue,...,blackheads,scurring,skin_peeling,silver_like_dusting,small_dents_in_nails,inflammatory_nails,blister,red_sore_around_nose,yellow_crust_ooze,disease
0,1,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,1,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4915,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,36
4916,0,1,0,0,0,0,0,0,0,0,...,1,1,0,0,0,0,0,0,0,37
4917,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,38
4918,0,1,0,0,0,0,1,0,0,0,...,0,0,1,1,1,1,0,0,0,39


In [40]:
df = df[(df['disease'] == 1) | (df['disease'] == 26) | (df['disease'] == 29)
              | (df['disease'] == 14) | (df['disease'] == 21) | (df['disease'] == 31) 
              | (df['disease'] == 7)]
df

Unnamed: 0,itching,skin_rash,nodal_skin_eruptions,continuous_sneezing,shivering,chills,joint_pain,stomach_pain,acidity,ulcers_on_tongue,...,blackheads,scurring,skin_peeling,silver_like_dusting,small_dents_in_nails,inflammatory_nails,blister,red_sore_around_nose,yellow_crust_ooze,disease
10,0,0,0,1,1,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
11,0,0,0,0,1,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
12,0,0,0,1,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
13,0,0,0,1,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
14,0,0,0,1,1,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4893,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,14
4900,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,21
4905,0,0,0,1,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,26
4908,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,29


In [41]:
print("No of samples after filtering:" ,df.shape[0])
col = df.columns
col = list(col)
col = col[:132]
print(len(col))


No of samples after filtering: 840
132


**Encoding the diseases**

In [42]:
l2 = []
for j in range(0,len(col)):
    l2.append(0)
    
disease_code = {1:'Allergy',7:'Diabetes',14:'Jaundice',21:'Hepatitis C',26:'Common Cold',29:'Heart attack',31:'Hypothyroidism'}


In [43]:
df.head()

Unnamed: 0,itching,skin_rash,nodal_skin_eruptions,continuous_sneezing,shivering,chills,joint_pain,stomach_pain,acidity,ulcers_on_tongue,...,blackheads,scurring,skin_peeling,silver_like_dusting,small_dents_in_nails,inflammatory_nails,blister,red_sore_around_nose,yellow_crust_ooze,disease
10,0,0,0,1,1,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
11,0,0,0,0,1,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
12,0,0,0,1,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
13,0,0,0,1,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
14,0,0,0,1,1,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1


**Splitting into training and testing**

In [51]:
X = df.drop('disease',axis = 1)
y = df['disease']

In [52]:
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.05)

In [53]:
print(X_train.shape)
print(Y_train.shape)
print(X_test.shape)
print(Y_test.shape)

(798, 132)
(798,)
(42, 132)
(42,)


**Naive Bayes Classifier**

In [54]:
nb = GaussianNB()

nb.fit(X_train,Y_train)

predicted = nb.predict(X_test)

score_nb = round(accuracy_score(predicted,Y_test)*100,2)

print("The accuracy score achieved using Naive Bayes is: "+str(score_nb)+" %")

The accuracy score achieved using Naive Bayes is: 100.0 %


In [16]:
filename = "seven_disease.sav"

In [17]:
joblib.dump(nb,filename)

['seven_disease.sav']

In [18]:
cls = joblib.load(filename)

**Checking the model**

In [19]:
sym1 = "chest_pain"
sym2 = "shivering"
sym3 = "chills"
sym4 = "itching"
sym5 = "skin_rash"

input_sym = [sym1,sym2,sym3,sym4,sym5]

for k in range(0,len(col)):
    for sym in input_sym:
        if(sym == col[k]):
            l2[k]=1

In [20]:
l2 = [l2]
sample = np.array(l2)
print(sample)

[[1 1 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]]


In [21]:
predict = cls.predict(sample)
predicted_sample = predict[0]

predicted_disease = disease_code[predicted_sample]

print(predicted_disease)


Allergy




In [16]:
import nltk
import sklearn

print('The nltk version is {}.'.format(nltk.__version__))
print('The scikit-learn version is {}.'.format(sklearn.__version__))

The nltk version is 3.2.5.
The scikit-learn version is 0.22.2.post1.


In [23]:
#a = nb.predict_proba(sample)
#print(a)
class_prob = nb.predict_log_proba(sample)
dis = list(class_prob[0])
print(dis)
print("\n\n")
res = []
res = [(dis[i],i) for i in range(0,len(dis))]
res.sort(reverse=True)
print(res)

[-2276251076.2766647, -2276251171.141083, 0.0, -18210008670.548996, -9105004395.029306, -4552502206.353046, -18210008661.426933]



[(0.0, 2), (-2276251076.2766647, 0), (-2276251171.141083, 1), (-4552502206.353046, 5), (-9105004395.029306, 4), (-18210008661.426933, 6), (-18210008670.548996, 3)]




In [24]:
print("Diseases ordered based on the probability:")
for d in res:
    print(disease_code[d[1]])
    
    

Diseases ordered based on the probability:
Allergy
Heart attack
Jaundice
Hepatitis C
Diabetes
Hypothyroidism
Common Cold


In [25]:
predict = nb.predict(sample)
predicted_sample = predict[0]

print("Disease that patient might probabaly got affected: ",disease_code[predicted_sample])


Disease that patient might probabaly got affected:  Allergy


