Mounting Google Drive

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
cd 'drive/My Drive/Anon 17/Set 4'

/content/drive/My Drive/Anon 17/Set 4


In [3]:
!ls

'Set 4 L1.arff'  'Set 4 L2.arff'  'Set 4 L3.arff'


Installing Java

In [4]:
import os       #importing os to set environment variable
def install_java():
  !apt-get install -y openjdk-8-jdk-headless -qq > /dev/null      #install openjdk
  os.environ["JAVA_HOME"] = "/usr/lib/jvm/java-8-openjdk-amd64"     #set environment variable
  !java -version       #check java version
install_java()

openjdk version "11.0.9.1" 2020-11-04
OpenJDK Runtime Environment (build 11.0.9.1+1-Ubuntu-0ubuntu1.18.04)
OpenJDK 64-Bit Server VM (build 11.0.9.1+1-Ubuntu-0ubuntu1.18.04, mixed mode, sharing)


Installing Weka and its Python Wrapper


In [5]:
!apt-get install libproj-dev proj-data proj-bin
!apt-get install libgeos-dev
!pip install cython
!pip install python-weka-wrapper3

Reading package lists... Done
Building dependency tree       
Reading state information... Done
proj-data is already the newest version (4.9.3-2).
proj-data set to manually installed.
The following NEW packages will be installed:
  libproj-dev proj-bin
0 upgraded, 2 newly installed, 0 to remove and 15 not upgraded.
Need to get 232 kB of archives.
After this operation, 1,220 kB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu bionic/universe amd64 libproj-dev amd64 4.9.3-2 [199 kB]
Get:2 http://archive.ubuntu.com/ubuntu bionic/universe amd64 proj-bin amd64 4.9.3-2 [32.3 kB]
Fetched 232 kB in 0s (2,727 kB/s)
Selecting previously unselected package libproj-dev:amd64.
(Reading database ... 145823 files and directories currently installed.)
Preparing to unpack .../libproj-dev_4.9.3-2_amd64.deb ...
Unpacking libproj-dev:amd64 (4.9.3-2) ...
Selecting previously unselected package proj-bin.
Preparing to unpack .../proj-bin_4.9.3-2_amd64.deb ...
Unpacking proj-bin (

Testing if Python Weka works


In [6]:
import weka.core.jvm as jvm
jvm.start(packages=True,max_heap_size="25G")

from weka.core.converters import Loader
from weka.core.classes import Random
from weka.classifiers import Classifier, Evaluation

DEBUG:weka.core.jvm:Adding bundled jars
DEBUG:weka.core.jvm:Classpath=['/usr/local/lib/python3.6/dist-packages/javabridge/jars/rhino-1.7R4.jar', '/usr/local/lib/python3.6/dist-packages/javabridge/jars/runnablequeue.jar', '/usr/local/lib/python3.6/dist-packages/javabridge/jars/cpython.jar', '/usr/local/lib/python3.6/dist-packages/weka/lib/python-weka-wrapper.jar', '/usr/local/lib/python3.6/dist-packages/weka/lib/weka.jar']
DEBUG:weka.core.jvm:MaxHeapSize=25G
DEBUG:weka.core.jvm:Package support enabled


ARFF File loader

In [7]:
loader = Loader(classname="weka.core.converters.ArffLoader")
data_file = 'Set 4 L1.arff'
data = loader.load_file(data_file)
print('Sample size: ', data.num_attributes)
print(data.attribute_names())


Sample size:  41
['pl_1', 'pl_2', 'pl_3', 'pl_4', 'pl_5', 'pl_6', 'pl_7', 'pl_8', 'pl_9', 'pl_10', 'pl_11', 'pl_12', 'pl_13', 'pl_14', 'pl_15', 'pl_16', 'pl_17', 'pl_18', 'pl_19', 'pl_20', 'iat_1', 'iat_2', 'iat_3', 'iat_4', 'iat_5', 'iat_6', 'iat_7', 'iat_8', 'iat_9', 'iat_10', 'iat_11', 'iat_12', 'iat_13', 'iat_14', 'iat_15', 'iat_16', 'iat_17', 'iat_18', 'iat_19', 'iat_20', 'TrafficType']


Packets Extraction(Ordered)

In [8]:
from weka.filters import Filter
def extract_no_of_packets(n):
  l1=[]
  for i in range(1,n+1):
    l1.append(i)
    l1.append(20+i)
  l1.append(41)
#Converting list to str
  l1=(list(map(str,l1)))
  l1= ','.join(l1)

#Copy of original data
  Cdata=data
  reordered = Filter(classname='weka.filters.unsupervised.attribute.Reorder',options=["-R",l1])
  reordered.inputformat(Cdata)
  reordered=reordered.filter(Cdata)
  print(reordered.attribute_names())
  return reordered



In [9]:
from weka.classifiers import Evaluation
from weka.core.classes import Random
classifiers=["weka.classifiers.bayes.NaiveBayes","weka.classifiers.bayes.BayesNet","weka.classifiers.trees.J48","weka.classifiers.trees.RandomForest"]
parameters=[["-D"],["-D","-Q","weka.classifiers.bayes.net.search.local.TAN"],["-U"],[""]]
def MLClassifier(choice,data,n):
  cls = Classifier(classname=classifiers[choice-1],options=parameters[choice-1])
  data.class_index=(n*2)

  evl = Evaluation(data)
  evl.crossvalidate_model(cls, data, 10, Random(1))
  print('For the classifier '+classifiers[choice-1])
  print("Accuracy: ",evl.percent_correct)
  print("F-Measure: ",evl.unweighted_macro_f_measure)
  print("\n")
if __name__=="__main__":
  no_of_packets=int(input("Enter the number of packets:"))
  for i in range(4,5):
    Temp_dataset=extract_no_of_packets(no_of_packets)
    print("NUMBER OF PACKETS:",no_of_packets)  
    MLClassifier(i,Temp_dataset,no_of_packets)

Enter the number of packets:5
['pl_1', 'iat_1', 'pl_2', 'iat_2', 'pl_3', 'iat_3', 'pl_4', 'iat_4', 'pl_5', 'iat_5', 'TrafficType']
NUMBER OF PACKETS: 5
For the classifier weka.classifiers.trees.RandomForest
Accuracy:  99.79261202495746
F-Measure:  0.9977698375203458


