In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import classification_report

In [2]:
df=pd.read_csv('memproc.csv')
df.head()

Unnamed: 0,host,proc,mem,state
0,crisnd6378,-1.735788,-0.722979,Normal
1,crisnd5885,-0.56877,-1.934926,Normal
2,crisnd4508,-1.102691,-2.629311,Normal
3,crisnd6376,-2.010346,-1.778285,Normal
4,crisnd1301,-0.683525,-0.396034,Normal


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 247 entries, 0 to 246
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   host    247 non-null    object 
 1   proc    247 non-null    float64
 2   mem     247 non-null    float64
 3   state   247 non-null    object 
dtypes: float64(2), object(2)
memory usage: 7.8+ KB


In [4]:
df.drop('host', axis =1,inplace =True)

In [5]:
# df['state'],_=pd.factorize(df['state'])
# df.info()

In [6]:
_

Unnamed: 0,host,proc,mem,state
0,crisnd6378,-1.735788,-0.722979,Normal
1,crisnd5885,-0.56877,-1.934926,Normal
2,crisnd4508,-1.102691,-2.629311,Normal
3,crisnd6376,-2.010346,-1.778285,Normal
4,crisnd1301,-0.683525,-0.396034,Normal


In [7]:
features=['proc','mem']
X=df[features]
y=df['state']
y

0        Normal
1        Normal
2        Normal
3        Normal
4        Normal
         ...   
242    Infected
243    Infected
244    Infected
245      Normal
246    Infected
Name: state, Length: 247, dtype: object

In [8]:
scaling=StandardScaler()
scaling.fit_transform(X)
X

Unnamed: 0,proc,mem
0,-1.735788,-0.722979
1,-0.568770,-1.934926
2,-1.102691,-2.629311
3,-2.010346,-1.778285
4,-0.683525,-0.396034
...,...,...
242,3.142826,0.329150
243,1.054215,0.711637
244,0.410152,2.400802
245,-0.075083,-0.571929


In [9]:
Xtrain,Xtest,ytrain,ytest=train_test_split(X,y , test_size=0.2,random_state=13,stratify=y)
dtc=DecisionTreeClassifier()
rfc=RandomForestClassifier()
abc=AdaBoostClassifier()
dbc=GradientBoostingClassifier()
svmm=SVC(probability=True)
nb=GaussianNB()
vcl=VotingClassifier(estimators=[('dcntree',dtc),('naive_bayes',nb),('svm',svmm)],voting='hard')

In [10]:
dtc.fit(Xtrain,ytrain)
rfc.fit(Xtrain,ytrain)
abc.fit(Xtrain,ytrain)
dbc.fit(Xtrain,ytrain)
vcl.fit(Xtrain,ytrain)
nb.fit(Xtrain,ytrain)
svmm.fit(Xtrain,ytrain)

SVC(probability=True)

In [11]:
dtc_pred=dtc.predict(Xtest)
rfc_pred=rfc.predict(Xtest)
abc_pred=abc.predict(Xtest)
dbc_pred=dbc.predict(Xtest)
svmm_pred=svmm.predict(Xtest)
nb_pred=nb.predict(Xtest)
vcl_pred=vcl.predict(Xtest)

In [12]:
dtc_pred

array(['Infected', 'Infected', 'Normal', 'Normal', 'Normal', 'Infected',
       'Normal', 'Normal', 'Normal', 'Normal', 'Normal', 'Normal',
       'Normal', 'Normal', 'Normal', 'Normal', 'Normal', 'Normal',
       'Normal', 'Infected', 'Normal', 'Normal', 'Infected', 'Infected',
       'Normal', 'Infected', 'Infected', 'Infected', 'Normal', 'Normal',
       'Normal', 'Normal', 'Normal', 'Normal', 'Normal', 'Normal',
       'Normal', 'Normal', 'Normal', 'Normal', 'Normal', 'Normal',
       'Infected', 'Normal', 'Normal', 'Infected', 'Normal', 'Normal',
       'Normal', 'Infected'], dtype=object)

In [13]:
accuracy=accuracy_score(ytest,vcl_pred)
accuracy

0.88

In [14]:
dtc_report=classification_report(ytest,dtc_pred)
print(dtc_report)
print()
rfc_report=classification_report(ytest,rfc_pred)
print(rfc_report)
print()
abc_report=classification_report(ytest,abc_pred)
print(abc_report)
print()
dbc_report=classification_report(ytest,dbc_pred)
print(dbc_report)
print()
svmm_report=classification_report(ytest,svmm_pred)
print(svmm_report)
print()
nb_report=classification_report(ytest,nb_pred)
print(nb_report)


              precision    recall  f1-score   support

    Infected       0.83      0.91      0.87        11
      Normal       0.97      0.95      0.96        39

    accuracy                           0.94        50
   macro avg       0.90      0.93      0.92        50
weighted avg       0.94      0.94      0.94        50


              precision    recall  f1-score   support

    Infected       0.89      0.73      0.80        11
      Normal       0.93      0.97      0.95        39

    accuracy                           0.92        50
   macro avg       0.91      0.85      0.88        50
weighted avg       0.92      0.92      0.92        50


              precision    recall  f1-score   support

    Infected       0.88      0.64      0.74        11
      Normal       0.90      0.97      0.94        39

    accuracy                           0.90        50
   macro avg       0.89      0.81      0.84        50
weighted avg       0.90      0.90      0.89        50


              pr

In [15]:
class_report=classification_report(ytest,vcl_pred)
print(class_report)

              precision    recall  f1-score   support

    Infected       0.78      0.64      0.70        11
      Normal       0.90      0.95      0.92        39

    accuracy                           0.88        50
   macro avg       0.84      0.79      0.81        50
weighted avg       0.88      0.88      0.88        50



In [17]:
pred=dtc.predict([[3,0.3]])
pred

array(['Infected'], dtype=object)