**0.Libraries**

In [0]:
import numpy as np
import pandas as pd
import sklearn.utils

**1.Utility Functions**

In [0]:
def getMyDF():

  df = pd.DataFrame(columns=['Internet','Minute','Hour','Weekday','Location','Audiocable','LABEL'])

  dtypes = {'Internet':'bool','Minute':'int8','Hour':'int8','Weekday':'int8','Location':'bool','Audiocable':'bool','LABEL':'int8'}
  for c in df.columns:
    df[c] = df[c].astype(dtypes[c])
  
  return df

#*********************
def prop_bool(size,p):

  ans = np.empty(size,dtype='bool')

  for i in range(size):
    if i<size*p:
      ans[i]=True
    else:
      ans[i]=False

  np.random.shuffle(ans)

  return ans
  
#***********************************************
def generatorX(Ip,Ml,Mh,Hl,Hh,Wl,Wh,Lp,Ap,size,k):

  df = getMyDF()

  df['Internet']=prop_bool(size,Ip) 
  df['Minute']=np.random.randint(low=Ml,high=Mh+1,size=size,dtype='int8') #1 to 59
  df['Hour']=np.random.randint(low=Hl,high=Hh+1,size=size,dtype='int8') #8 to 23
  df['Weekday']=np.random.randint(low=Wl,high=Wh+1,size=size,dtype='int8') #1 to 5
  df['Location']=prop_bool(size,Lp)
  df['Audiocable']=prop_bool(size,Ap) 
  df['LABEL']=np.full(size,2**k,dtype='int8')

  return df

**2.Dataset Generation**

Dataset for each application

In [0]:
NUM_OF_RECORDS=500

appId = {'FireFox':4,'Matlab':3,'PDFviewer':2,'PUBG':1,'VLC':0}

df_FireFox         = generatorX(1  ,1 ,59,8 ,23,1,5,0.4,0.2,NUM_OF_RECORDS,appId['FireFox'])
df_Matlab          = generatorX(0.5,45,59,2 ,3 ,3,4,0.2,0.5,NUM_OF_RECORDS,appId['Matlab'])
df_PDFviewer       = generatorX(0.5,1 ,59,8 ,23,1,5,0.2,0.5,NUM_OF_RECORDS,appId['PDFviewer'])
df_PUBG            = generatorX(1  ,1 ,59,18,23,1,7,0.7,0.7,NUM_OF_RECORDS,appId['PUBG'])
df_VLC             = generatorX(0.5,1 ,59,8 ,23,6,7,0.5,0.9,NUM_OF_RECORDS,appId['VLC'])

Check dataset

In [0]:
print('FireFox')
df_FireFox.head()

FireFox


Unnamed: 0,Internet,Minute,Hour,Weekday,Location,Audiocable,LABEL
0,True,5,19,3,True,False,16
1,True,50,17,4,False,False,16
2,True,32,9,2,False,False,16
3,True,44,11,3,False,True,16
4,True,27,13,3,True,False,16


In [0]:
print('Matlab')
df_Matlab.head()

Matlab


Unnamed: 0,Internet,Minute,Hour,Weekday,Location,Audiocable,LABEL
0,False,57,2,4,True,False,8
1,True,49,3,3,False,True,8
2,True,51,2,3,False,True,8
3,True,56,3,4,True,True,8
4,True,49,2,3,False,False,8


In [0]:
print('PDFViewer')
df_PDFviewer.head()

PDFViewer


Unnamed: 0,Internet,Minute,Hour,Weekday,Location,Audiocable,LABEL
0,True,18,14,2,False,True,4
1,True,8,21,1,False,True,4
2,False,24,21,3,True,False,4
3,False,42,15,3,False,True,4
4,False,55,12,2,False,False,4


In [0]:
print('PUBG')
df_PUBG.head() 

PUBG


Unnamed: 0,Internet,Minute,Hour,Weekday,Location,Audiocable,LABEL
0,True,46,20,2,True,False,2
1,True,26,21,6,True,True,2
2,True,36,23,3,True,True,2
3,True,50,18,3,True,True,2
4,True,26,18,5,True,False,2


In [0]:
print('VLC')
df_VLC.head()

VLC


Unnamed: 0,Internet,Minute,Hour,Weekday,Location,Audiocable,LABEL
0,True,38,16,6,True,True,1
1,True,21,11,7,False,True,1
2,False,36,21,7,False,True,1
3,False,2,15,6,False,True,1
4,False,45,11,6,False,True,1


Label Preparation

In [0]:

def labelAssign(df_FireFox,df_Matlab,df_PDFviewer,df_PUBG,df_VLC,size):

  df = getMyDF()

  #FireFox
  for index1,row1 in df_FireFox.iterrows():
    for index2,row2 in df_Matlab.iterrows():
      if  np.array_equal(row1.iloc[0:6],row2.iloc[0:6]):
        print('Found!! 43')
        df_FireFox.loc[index1,'LABEL'] += 2**appId['Matlab']
        df_Matlab.drop(index2,inplace=True)
        
    df_Matlab.reset_index(drop=True,inplace=True)

    for index2,row2 in df_PDFviewer.iterrows():
      if  np.array_equal(row1.iloc[0:6],row2.iloc[0:6]):
        print('Found!! 42')
        df_FireFox.loc[index1,'LABEL'] += 2**appId['PDFviewer']
        df_PDFviewer.drop(index2,inplace=True)
        
    df_PDFviewer.reset_index(drop=True,inplace=True)

    for index2,row2 in df_PUBG.iterrows():
      if  np.array_equal(row1.iloc[0:6],row2.iloc[0:6]):
        print('Found!! 41')
        df_FireFox.loc[index1,'LABEL'] += 2**appId['PUBG']
        df_PUBG.drop(index2,inplace=True)

    df_PUBG.reset_index(drop=True,inplace=True)

    for index2,row2 in df_VLC.iterrows():
      if  np.array_equal(row1.iloc[0:6],row2.iloc[0:6]):
        print('Found!! 40')
        df_FireFox.loc[index1,'LABEL'] += 2**appId['VLC']
        df_VLC.drop(index2,inplace=True)

    df_VLC.reset_index(drop=True,inplace=True)

  df = df.append(df_FireFox,ignore_index=True)

  #Matlab
  for index1,row1 in df_Matlab.iterrows():
    for index2,row2 in df_PDFviewer.iterrows():
      if  np.array_equal(row1.iloc[0:6],row2.iloc[0:6]):
        print('Found!! 32')
        df_Matlab.loc[index1,'LABEL'] += 2**appId['PDFviewer']
        df_PDFviewer.drop(index2,inplace=True)
        
    df_PDFviewer.reset_index(drop=True,inplace=True)

    for index2,row2 in df_PUBG.iterrows():
      if  np.array_equal(row1.iloc[0:6],row2.iloc[0:6]):
        print('Found!! 31')
        df_Matlab.loc[index1,'LABEL'] += 2**appId['PUBG']
        df_PUBG.drop(index2,inplace=True)

    df_PUBG.reset_index(drop=True,inplace=True)

    for index2,row2 in df_VLC.iterrows():
      if  np.array_equal(row1.iloc[0:6],row2.iloc[0:6]):
        print('Found!! 30')
        df_Matlab.loc[index1,'LABEL'] += 2**appId['VLC']
        df_VLC.drop(index2,inplace=True)

    df_VLC.reset_index(drop=True,inplace=True)

  df = df.append(df_Matlab,ignore_index=True)

  #PDFviewer
  for index1,row1 in df_PDFviewer.iterrows():
    for index2,row2 in df_PUBG.iterrows():
      if  np.array_equal(row1.iloc[0:6],row2.iloc[0:6]):
        print('Found!! 21')
        df_PDFviewer.loc[index1,'LABEL'] += 2**appId['PUBG']
        df_PUBG.drop(index2,inplace=True)

    df_PUBG.reset_index(drop=True,inplace=True)

    for index2,row2 in df_VLC.iterrows():
      if  np.array_equal(row1.iloc[0:6],row2.iloc[0:6]):
        print('Found!! 20')
        df_PDFviewer.loc[index1,'LABEL'] += 2**appId['VLC']
        df_VLC.drop(index2,inplace=True)

    df_VLC.reset_index(drop=True,inplace=True)

  df = df.append(df_PDFviewer,ignore_index=True)



  #PUBG
  for index1,row1 in df_PUBG.iterrows():
    for index2,row2 in df_VLC.iterrows():
      if  np.array_equal(row1.iloc[0:6],row2.iloc[0:6]):
        print('Found!! 10')
        df_PUBG.loc[index1,'LABEL'] += 2**appId['VLC']
        df_VLC.drop(index2,inplace=True)

    df_VLC.reset_index(drop=True,inplace=True)

  df = df.append(df_PUBG,ignore_index=True)

  #VLC
  df = df.append(df_VLC,ignore_index=True)

  return df

  
df_final = labelAssign(df_FireFox,df_Matlab,df_PDFviewer,df_PUBG,df_VLC,NUM_OF_RECORDS)


Found!! 42
Found!! 42
Found!! 42
Found!! 41
Found!! 42
Found!! 42
Found!! 42
Found!! 42
Found!! 42
Found!! 42
Found!! 41
Found!! 41
Found!! 41
Found!! 41
Found!! 42
Found!! 42
Found!! 42
Found!! 41
Found!! 41
Found!! 41
Found!! 42
Found!! 41
Found!! 41
Found!! 41
Found!! 42
Found!! 41
Found!! 41
Found!! 42
Found!! 41
Found!! 42
Found!! 41
Found!! 41
Found!! 42
Found!! 42
Found!! 41
Found!! 42
Found!! 42
Found!! 42
Found!! 42
Found!! 42
Found!! 42
Found!! 42
Found!! 41
Found!! 42
Found!! 42
Found!! 42
Found!! 42
Found!! 42
Found!! 42
Found!! 41
Found!! 42
Found!! 41
Found!! 42
Found!! 42
Found!! 42
Found!! 42
Found!! 42
Found!! 21
Found!! 21
Found!! 21
Found!! 21
Found!! 21
Found!! 21
Found!! 21
Found!! 21
Found!! 21
Found!! 21
Found!! 21
Found!! 21
Found!! 21
Found!! 21
Found!! 10
Found!! 10
Found!! 10
Found!! 10
Found!! 10
Found!! 10
Found!! 10
Found!! 10
Found!! 10
Found!! 10
Found!! 10
Found!! 10
Found!! 10
Found!! 10
Found!! 10
Found!! 10
Found!! 10
Found!! 10
Found!! 10
Found!! 10

Total Number of records in dataset

In [0]:
len(df_final.index)

4899

**4.Shuffle and Save**

In [0]:
df_final = sklearn.utils.shuffle(df_final)
df_final.reset_index(drop=True,inplace=True)
df_final

Unnamed: 0,Internet,Minute,Hour,Weekday,Location,Audiocable,LABEL
0,False,51,14,3,False,False,4
1,False,49,3,4,False,True,8
2,True,22,23,5,True,False,2
3,True,56,3,4,True,True,8
4,False,5,21,1,False,False,4
...,...,...,...,...,...,...,...
4894,True,52,21,7,True,True,2
4895,True,20,20,5,True,True,2
4896,False,56,2,4,False,True,8
4897,False,53,20,6,False,True,1


In [0]:
df_final.to_csv('OS_Data.csv')