In [1]:
import numpy as np
import random as r

In [2]:
# Generates parent chromosomes having count=population
def generateParents(size,population):     
    parents = np.array(r.randint(0, 2**size - 1))   #produces random number b/w 0 and 2^size
    for i in range(1, population):
        parents = np.append(parents, r.randint(0, 2**size - 1))
    #print(parents)
    return parents                        #returns parent chromosomes

In [3]:
def mutate(rec, size):                           #mutates a random bit of 'rec' to 0 or 1
    index = r.randint(0, size - 1)                  #generates a random 'index' b/w 0 and 'size' of 'rec' 
    rec = rec ^ (1 << index)                     #mutates the bit
    return rec                                   #returns the mutated chromosome

In [4]:
def crossover(mom, dad, size):                              #crosses over the 2 parents to generate 2 offsprings
    index = r.randint(1, size - 1)                          #gets random 'index' around which the crossover will take place
    mom1 = mom & (2**index -1)                              #selects the rightmost 'index' no. of bits from 'mom' chromosome 
    mom2 = mom & ((2**(size-index) -1) << index)            #selects the rest leftmost bits from the 'mom' chromosome
    dad1 = dad & (2**index -1)                              #selects the rightmost 'index' no. of bits from 'dad' chromosome 
    dad2 = dad & ((2**(size-index) -1) << index)            #selects the rest leftmost bits from the 'dad' chromosome
    return mutate(mom1|dad2, size), mutate(dad1|mom2, size) #mutates the two offspring and returns them

In [5]:
def newGeneration(generation, size):             #generates the new generation using the parents
    top4 = generation[:4, 0]                     #selects top 4 chromosomes from the previous generation based on fitness
    newGen = generation[:2,0]                    #adds the top 2 chromosomes from the previous generation to the new generation
    for i in range(0, 4):                        #mating each of the top 4 chromosomes with each other
        for j in range(0, 4):
            if(i != j):
                c1, c2 = crossover(top4[i], top4[j], size)   #each crossover gives 2 offsprings
                newGen = np.append(newGen, c1)               #the offspring are added to the new generation
                newGen = np.append(newGen, c2)
                #print(newGen)
    return newGen  

In [6]:
def select_kernel(ker):
    if(ker==0):
        return 'linear'
    elif(ker==1):
        return 'poly'
    elif(ker==2):
        return 'sigmoid'
    else:
        return 'rbf'

In [21]:
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC
'''from sklearn import decomposition
from sklearn import svm
from sklearn.model_selection import GridSearchCV'''
import warnings
warnings.filterwarnings("ignore")

In [22]:
def assign_fitness(chromosomes, size, population,x_train,y_train,x_test,y_test):      
    datasize = chromosomes.shape[0]                     #gives the shape of the array 'chromosomes'
    fitness = np.zeros((datasize,1), dtype=int)  #initialize 'fitness' to 0 for all the chromosomes
    for i in range(0, datasize):
        rec = chromosomes[i]
        # Right shift the number by 31 bits to get the desired bits at the rightmost end of the number
        ker = rec >> (10)
        kernel=select_kernel(ker)
        
        c=rec >> (5)
        mask = (1 << 5) - 1        #Mask the rightmost k bits to get rid of any additional bits on the left
        c = c & mask
        c = bin(c)[2:]              # Convert the extracted bits to decimal
        c = int(c, 2)
        c=0.1+((100-0.1)/(2**5-1))*c

        '''gamma=rec >> (3)
        mask = (1 << 10) - 1    #Mask the rightmost k bits to get rid of any additional bits on the left
        gamma = gamma & mask
        gamma = bin(gamma)[2:]         # Convert the extracted bits to decimal
        gamma = int(gamma, 2)
        gamma=0.0001+((10-0.0001)/(2**5-1))*gamma'''

        mask = (1 << 5) - 1    #Mask the rightmost k bits to get rid of any additional bits on the left
        gamma = rec & mask
        gamma = bin(gamma)[2:]            # Convert the extracted bits to decimal
        gamma = int(gamma, 2)
        gamma=(0.0001+((10-0.0001)/(2**5-1))*gamma)

        clf = SVC(kernel=kernel,C=c,gamma=gamma,max_iter=1000)
        clf.fit(x_train,y_train)
        y_pred = clf.predict(x_test)
        fitness[i]=-1*(accuracy_score(y_test, y_pred))
        #fitness[i] = -1* totalSize(chromosomes[i], size)  #assigns fitness to the chromosome based on the space it occupies
    chromosomes = np.transpose(np.array([chromosomes]))
    generation = np.concatenate((chromosomes, fitness), axis=1)  #concatenates the 'chromosomes' and 'fitness' arrays column-wise
    generation = generation[generation[:population, 1].argsort()]  #sorts the 'generation' array w.r.t. 'fitness'
    return generation   

In [9]:
def apply_svm(x_train,y_train,x_test,y_test):
    population=5
    accuracy=[]
    parents = generateParents(12,population)     #generates parents having count=population      #first 10 chromosomes
    generation = assign_fitness(parents, 12, population,x_train,y_train,x_test,y_test)  #fixing and assigning fitness to chromosomes
    ng = generation
    accuracy.extend(-(ng[:,1:]))
    for i in range(100):                         #new generations are produced '100' times       #each time 10 new chromosomes are added
        ng = newGeneration(ng, 12)               
        ng = assign_fitness(ng, 12, population,x_train,y_train,x_test,y_test)
        accuracy.extend(-(ng[:,1:]))
            
    best_chromosome = ng[0,0]                             #choosing the best chromosome from the final generation
    currentBestaccuracy = -ng[0,1]                   #accuracy best chromosome
    print("accu",currentBestaccuracy)    
    return currentBestaccuracy,accuracy
        

In [10]:
import pandas as pd
import numpy as np

In [11]:
df=pd.read_csv("E:\Study\Sem 6\Predictive Analytics using Statistics\Dry_Bean_Dataset.csv")

In [12]:
df.head()

Unnamed: 0,Area,Perimeter,MajorAxisLength,MinorAxisLength,AspectRation,Eccentricity,ConvexArea,EquivDiameter,Extent,Solidity,roundness,Compactness,ShapeFactor1,ShapeFactor2,ShapeFactor3,ShapeFactor4,Class
0,28395,610.291,208.178117,173.888747,1.197191,0.549812,28715,190.141097,0.763923,0.988856,0.958027,0.913358,0.007332,0.003147,0.834222,0.998724,SEKER
1,28734,638.018,200.524796,182.734419,1.097356,0.411785,29172,191.272751,0.783968,0.984986,0.887034,0.953861,0.006979,0.003564,0.909851,0.99843,SEKER
2,29380,624.11,212.82613,175.931143,1.209713,0.562727,29690,193.410904,0.778113,0.989559,0.947849,0.908774,0.007244,0.003048,0.825871,0.999066,SEKER
3,30008,645.884,210.557999,182.516516,1.153638,0.498616,30724,195.467062,0.782681,0.976696,0.903936,0.928329,0.007017,0.003215,0.861794,0.994199,SEKER
4,30140,620.134,201.847882,190.279279,1.060798,0.33368,30417,195.896503,0.773098,0.990893,0.984877,0.970516,0.006697,0.003665,0.9419,0.999166,SEKER


In [13]:
'''from sklearn.preprocessing import StandardScaler
ss = StandardScaler()
df.iloc[:,0:-1]=ss.fit_transform(df.iloc[:,0:-1])'''

'from sklearn.preprocessing import StandardScaler\nss = StandardScaler()\ndf.iloc[:,0:-1]=ss.fit_transform(df.iloc[:,0:-1])'

In [14]:
'''from sklearn.preprocessing import MinMaxScaler
scaling = MinMaxScaler(feature_range=(-1,1)).fit(X_train)
X_train = scaling.transform(X_train)
X_test = scaling.transform(X_test)'''

'from sklearn.preprocessing import MinMaxScaler\nscaling = MinMaxScaler(feature_range=(-1,1)).fit(X_train)\nX_train = scaling.transform(X_train)\nX_test = scaling.transform(X_test)'

In [15]:
df.describe()

Unnamed: 0,Area,Perimeter,MajorAxisLength,MinorAxisLength,AspectRation,Eccentricity,ConvexArea,EquivDiameter,Extent,Solidity,roundness,Compactness,ShapeFactor1,ShapeFactor2,ShapeFactor3,ShapeFactor4
count,13611.0,13611.0,13611.0,13611.0,13611.0,13611.0,13611.0,13611.0,13611.0,13611.0,13611.0,13611.0,13611.0,13611.0,13611.0,13611.0
mean,53048.284549,855.283459,320.141867,202.270714,1.583242,0.750895,53768.200206,253.06422,0.749733,0.987143,0.873282,0.799864,0.006564,0.001716,0.64359,0.995063
std,29324.095717,214.289696,85.694186,44.970091,0.246678,0.092002,29774.915817,59.17712,0.049086,0.00466,0.05952,0.061713,0.001128,0.000596,0.098996,0.004366
min,20420.0,524.736,183.601165,122.512653,1.024868,0.218951,20684.0,161.243764,0.555315,0.919246,0.489618,0.640577,0.002778,0.000564,0.410339,0.947687
25%,36328.0,703.5235,253.303633,175.84817,1.432307,0.715928,36714.5,215.068003,0.718634,0.98567,0.832096,0.762469,0.0059,0.001154,0.581359,0.993703
50%,44652.0,794.941,296.883367,192.431733,1.551124,0.764441,45178.0,238.438026,0.759859,0.988283,0.883157,0.801277,0.006645,0.001694,0.642044,0.996386
75%,61332.0,977.213,376.495012,217.031741,1.707109,0.810466,62294.0,279.446467,0.786851,0.990013,0.916869,0.83427,0.007271,0.00217,0.696006,0.997883
max,254616.0,1985.37,738.860154,460.198497,2.430306,0.911423,263261.0,569.374358,0.866195,0.994677,0.990685,0.987303,0.010451,0.003665,0.974767,0.999733


In [16]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13611 entries, 0 to 13610
Data columns (total 17 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Area             13611 non-null  int64  
 1   Perimeter        13611 non-null  float64
 2   MajorAxisLength  13611 non-null  float64
 3   MinorAxisLength  13611 non-null  float64
 4   AspectRation     13611 non-null  float64
 5   Eccentricity     13611 non-null  float64
 6   ConvexArea       13611 non-null  int64  
 7   EquivDiameter    13611 non-null  float64
 8   Extent           13611 non-null  float64
 9   Solidity         13611 non-null  float64
 10  roundness        13611 non-null  float64
 11  Compactness      13611 non-null  float64
 12  ShapeFactor1     13611 non-null  float64
 13  ShapeFactor2     13611 non-null  float64
 14  ShapeFactor3     13611 non-null  float64
 15  ShapeFactor4     13611 non-null  float64
 16  Class            13611 non-null  object 
dtypes: float64(1

In [17]:
(df.Class).unique()

array(['SEKER', 'BARBUNYA', 'BOMBAY', 'CALI', 'HOROZ', 'SIRA', 'DERMASON'],
      dtype=object)

## 10 RANDOM SAMPLES

In [18]:
l = [pd.DataFrame(df.sample(n=5000)) for _ in range(10)]

In [19]:
l[1]

Unnamed: 0,Area,Perimeter,MajorAxisLength,MinorAxisLength,AspectRation,Eccentricity,ConvexArea,EquivDiameter,Extent,Solidity,roundness,Compactness,ShapeFactor1,ShapeFactor2,ShapeFactor3,ShapeFactor4,Class
5618,41774,797.024,308.948457,172.913673,1.786721,0.828706,42522,230.625907,0.652719,0.982409,0.826367,0.746487,0.007396,0.001417,0.557242,0.995637,HOROZ
5470,97727,1184.262,452.170756,277.520232,1.629325,0.789500,99056,352.746199,0.761683,0.986583,0.875647,0.780117,0.004627,0.001057,0.608583,0.991580,CALI
5999,49158,884.740,368.010841,170.960013,2.152614,0.885546,50011,250.179755,0.623200,0.982944,0.789174,0.679816,0.007486,0.000986,0.462150,0.994831,HOROZ
4239,68609,996.685,380.281068,231.096692,1.645550,0.794167,69346,295.559963,0.680645,0.989372,0.867911,0.777215,0.005543,0.001248,0.604062,0.994015,CALI
6834,57352,967.188,389.459490,188.328989,2.067974,0.875308,58247,270.227375,0.684620,0.984634,0.770436,0.693852,0.006791,0.000971,0.481431,0.995587,HOROZ
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8892,45200,810.461,296.899313,196.497617,1.510956,0.749652,46370,239.896702,0.710591,0.974768,0.864737,0.808007,0.006569,0.001727,0.652875,0.986466,SIRA
7161,61331,981.574,405.647760,193.456046,2.096847,0.878954,61973,279.444189,0.676122,0.989641,0.799915,0.688884,0.006614,0.000919,0.474561,0.995082,HOROZ
1253,40705,738.355,249.136239,208.625320,1.194180,0.546599,41147,227.655915,0.764413,0.989258,0.938269,0.913781,0.006121,0.002632,0.834995,0.997135,SEKER
2307,61786,966.634,327.716134,241.103058,1.359237,0.677300,62957,280.478838,0.734219,0.981400,0.830952,0.855859,0.005304,0.001755,0.732495,0.995634,BARBUNYA


In [23]:
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

best=0
acc=[None]*1000;
for i in range(10):
    x=l[i].drop(['Class'],axis=1)
    #print(x)
    y=l[i].Class
    #print(y)
    x_train, x_test, y_train, y_test = train_test_split(x,y, random_state=42, test_size=0.3)
    #from sklearn.preprocessing import MinMaxScaler
    '''scaling = MinMaxScaler(feature_range=(-1,1)).fit(x_train)
    x_train = scaling.transform(x_train)
    x_test = scaling.transform(x_test)'''
    best_accuracy,accuracy=apply_svm(x_train,y_train,x_test,y_test)
    print("Sample no. :",best_accuracy,"\n")
    if(best_accuracy>best):
        best=best_accuracy
        acc=accuracy

x = np.arange(start=0,stop=1000,step=1,dtype=int)
plt.plot(x, acc)  # Plot the chart
plt.show()  # display  