The plan is to implement PSO in order to find the different optima from our parameter space. As PSO needs an evaluation function to work, we will use a linear regression model to predict the speed based on the parameters being tried. 

The linear regression is done from the collected data from the robot.

### Imports

In [1]:
# Imports
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, mean_squared_error
from sklearn.ensemble import RandomForestRegressor

import copy
import numpy.random as rnd
import time
import matplotlib.pyplot as plt

from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
import random
import numpy as np
from matplotlib import pyplot as plt
from matplotlib import animation
from utils import subspace

### Regression Models

In [2]:
# Get the data
df1 = pd.read_csv("firstbatch_500Samples.csv")
df2 = pd.read_csv("secondbatch_500Samples.csv")
df=pd.concat([df1,df2])

In [3]:
df.head()

Unnamed: 0.1,Unnamed: 0,Snelheid,Omvang1,Positie1,Omvang2,Positie2,Relatie,Speed
0,0,0.0,0.0,-15.0,15.0,-75.0,15.0,1.012087
1,1,0.0,0.0,15.0,30.0,30.0,-90.0,0.844846
2,2,0.0,0.0,15.0,60.0,-30.0,-90.0,2.491373
3,3,0.0,0.0,45.0,60.0,75.0,60.0,1.24966
4,4,0.0,0.0,75.0,30.0,-75.0,-90.0,1.112912


In [4]:
columns = ['Snelheid', 'Omvang1', 'Positie1', 'Relatie', 'Omvang2', "Positie2", 'Speed']
df = df[columns]

In [5]:
df

Unnamed: 0,Snelheid,Omvang1,Positie1,Relatie,Omvang2,Positie2,Speed
0,0.0,0.0,-15.0,15.0,15.0,-75.0,1.012087
1,0.0,0.0,15.0,-90.0,30.0,30.0,0.844846
2,0.0,0.0,15.0,-90.0,60.0,-30.0,2.491373
3,0.0,0.0,45.0,60.0,60.0,75.0,1.249660
4,0.0,0.0,75.0,-90.0,30.0,-75.0,1.112912
...,...,...,...,...,...,...,...
493,9.0,75.0,-45.0,30.0,0.0,-90.0,5.130143
494,9.0,75.0,-30.0,30.0,0.0,-90.0,4.264560
495,9.0,75.0,-30.0,30.0,15.0,60.0,5.720083
496,9.0,75.0,30.0,-90.0,15.0,-15.0,5.768844


In [6]:
cols = df.drop(columns = ['Speed'], axis = 1).columns

In [7]:
# Prepare the data for the model

X = df.drop(columns = ['Speed'], axis = 1).values
y = df.Speed.values

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.33, random_state = 18)

# Create the model
model = LinearRegression()

# Train it
model.fit(X_train, y_train)

# Make prediction
y_pred = model.predict(X_test)
#  Evaluate the model
print('R^2 score: ', model.score(X_test, y_test))
print('MSE : ', mean_squared_error(y_test, y_pred))

R^2 score:  0.6657032213055256
MSE :  0.7867489933911326


In [8]:
# Regression Tree
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import cross_val_score

regressor = make_pipeline(StandardScaler(), DecisionTreeRegressor(random_state=0))
regressor.fit(X_train, y_train)
y_pred = regressor.predict(X_test)

print('R^2 score: ',regressor.score(X_test, y_test))
print('MSE : ', mean_squared_error(y_test, y_pred))

R^2 score:  0.360308282766712
MSE :  1.5054791032667916


In [9]:
# SVR
from sklearn.svm import SVR
import numpy as np

svr = SVR(C=1.0, epsilon=0.2, kernel = 'linear')
svr.fit(X_train, y_train)

y_pred = svr.predict(X_test)

print('R^2 score: ',svr.score(X_test, y_test))
print('MSE : ', mean_squared_error(y_test, y_pred))

R^2 score:  0.6677336902432429
MSE :  0.7819703969622361


In [10]:
# Random forest Regressor

regr = RandomForestRegressor(max_depth=6, random_state=42)
regr.fit(X_train, y_train)
y_pred = regr.predict(X_test)

print('R^2 score: ',regr.score(X_test, y_test))
print('MSE : ', mean_squared_error(y_test, y_pred))

R^2 score:  0.6887920108544798
MSE :  0.732410803214131


In [11]:
pd.DataFrame({'Variable':cols, 'Importance':regr.feature_importances_}).sort_values('Importance', ascending=False)

Unnamed: 0,Variable,Importance
0,Snelheid,0.486237
1,Omvang1,0.22117
4,Omvang2,0.176451
5,Positie2,0.04546
2,Positie1,0.044802
3,Relatie,0.02588


Export the function to be optimized

In [12]:
print(model.intercept_, model.coef_)
intercept = model.intercept_
coefs = model.coef_

-0.1627178232975437 [ 0.35198676  0.02415956 -0.0032345  -0.00092786  0.02235192  0.00246723]


### PSO

In [13]:
def fitness_function(model, parameters):

  # intercept = model.intercept_
  # coefs = model.coef_

  # sum = 0  
  # for i in range(6):
  #   sum += (coefs[i] * parameters[i])
  # f = intercept + sum
  return model.predict(np.array(parameters).reshape(1, 6))

In [None]:
# def update_velocity(particle, velocity, pbest, gbest, w_min=0.5, max=1.0, c=0.1):

#   # Initialise new velocity array
#   num_particle = len(particle)
#   new_velocity = np.array([0.0 for i in range(num_particle)])

#   # Randomly generate r1, r2 and inertia weight from normal distribution
#   r1 = random.uniform(0,max)
#   r2 = random.uniform(0,max)
#   w = random.uniform(w_min,max)
#   c1 = c
#   c2 = c
#   # Calculate new velocity
#   for i in range(num_particle):
#     new_velocity[i] = w*velocity[i] + c1*r1*(pbest[i]-particle[i])+c2*r2*(gbest[i]-particle[i])

#   return new_velocity

In [None]:
# def update_position(particle, velocity):

#   bounds = [[-90, 90], [-90, 90], [0, 1], [0, 90], [0, 90], [-90, 90]]

#   new_particle = np.zeros(6)

#   # print('b', particle)
#   # print('v', velocity)
#   # Move particles by adding velocity
#   for i in range(6):
#     if (particle[i] + velocity[i]) >= bounds[i][0] and (particle[i] + velocity[i]) <= bounds[i][1]:
#       new_particle[i] = particle[i] + velocity[i]
    
#   # print('a', new_particle)
#   return new_particle

In [None]:
# def pso(population_size, dimension, parameters, generation, fitness_criterion, model):
#   # Initialisation
#   # Population

#   particles = []

#   for i in range(population_size):
#     # Create a particle and add it to the population
#     particle = []
#     for j in range(6):
#       # Populate the particle with 6 random number from the 6 parameters
#       lower_bound = list(parameters.values())[j][0]
#       upper_bound = list(parameters.values())[j][1]

#       # Some parameters have to be integers
#       if j == 2:
#         particle.append(random.randint(lower_bound, upper_bound)/10)
#       else:
#         particle.append(random.randint(lower_bound, upper_bound))

#     particles.append(particle)

#   #particles = [[random.uniform(position_min, position_max) for j in range(dimension)] for i in range(population)]
#   # Particle's best position
#   pbest_position = particles
#   # Fitness
#   pbest_fitness = []

#   for particle in particles:
#     fitness = fitness_function(model, particle)
#     pbest_fitness.append(fitness)

#   # Index of the best particle
#   gbest_index = np.argmin(pbest_fitness)
#   # Global best particle position
#   gbest_position = pbest_position[gbest_index]
#   # print(gbest_position)
#   # Velocity (starting from 0 speed)
#   velocity = [[0.0 for j in range(dimension)] for i in range(population_size)]
  
#   # Loop for the number of generation
#   for t in range(generation):
#     # Stop if the average fitness value reached a predefined success criterion
#     if np.average(pbest_fitness) <= fitness_criterion:
#       break
#     else:
#       for n in range(population_size):
#         # Update the velocity of each particle
#         velocity[n] = update_velocity(particles[n], velocity[n], pbest_position[n], gbest_position)
#         # Move the particles to new position
#         particles[n] = update_position(particles[n], velocity[n])
        
#     # Calculate the fitness value
#     pbest_fitness = [fitness_function(model, particle) for particle in particles]
#     # Find the index of the best particle
#     gbest_index = np.argmin(pbest_fitness)
#     # Update the position of the best particle
#     gbest_position = pbest_position[gbest_index]
#     # print(gbest_position)

#   # Print the results
#   print('Global Best Position: ', gbest_position)
#   print('Best Fitness Value: ', min(pbest_fitness))
#   print('Average Particle Best Fitness Value: ', np.average(pbest_fitness))
#   print('Number of Generation: ', t)


In [None]:
# population_size = 1000
# dimension = 6
# generation = 1000
# fitness_criterion = 10e-4

In [None]:
# parameters = {
#     'freq' : [0, 10],
#     'Angles_0' : [-90, 90],
#     'Angles_1' : [-90, 90],
#     'phase' : [-90, 90],
#     'amp_1' : [0, 90],
#     'amp_2' : [0, 90]
# }

In [None]:
# pso(population_size, dimension, parameters, generation, fitness_criterion, model)

# New trial

In [14]:
def check_constraints(temp, bounds):

  counter = 0

  for i in range(6):
    if temp[i] >= bounds[i][0] and temp[i] <= bounds[i][1] :
      counter += 1

  return counter == 6

In [15]:
import random

def pso(space):

    # Define the problem-specific parameters
    num_particles = 30
    max_iterations = 300
    search_space = space #[[0, 10], [0, 90], [-90, 90], [-90, 90], [-90, 90], [0, 90]] # Define the search space for each parameter
    inertia = 0.5
    cognitive_constant = 1.5
    social_constant = 1.5


    # Initialize the swarm
    swarm = []
    best_positions = []
    global_best_position = None
    global_best_fitness = float('inf')

    for _ in range(num_particles):
        # Initialize particle position and velocity randomly within the search space
        position = [random.uniform(low, high) for low, high in search_space]
        velocity = [random.uniform(-1, 1) for _ in range(6)]
        
        # Evaluate fitness
        fitness = fitness_function(regr, position)
        
        # Initialize personal best position and fitness
        personal_best_position = position
        personal_best_fitness = fitness
        
        # Update global best position and fitness
        if fitness < global_best_fitness:
            global_best_position = position
            global_best_fitness = fitness
        
        # Add particle to the swarm
        swarm.append((position, velocity, personal_best_position, personal_best_fitness))
        best_positions.append(personal_best_position)

    # PSO main loop
    iteration = 0
    while iteration < max_iterations:
        for i in range(num_particles):
            # Update particle velocity and position if constraints are met
            position, velocity, personal_best_position, _ = swarm[i]
            
            # Update velocity
            velocity = (inertia * np.array(velocity) + cognitive_constant * random.uniform(0, 1) * (np.array(personal_best_position) - np.array(position)) + social_constant * random.uniform(0, 1) * (np.array(personal_best_position) - np.array(position)))
            
            # Update position
            position_temp = np.array(position) + np.array(velocity)
            
            if check_constraints(position_temp, search_space):
                position = position_temp

            # Evaluate fitness
            fitness = fitness_function(regr, position)
            
            # Update personal best position and fitness
            if fitness < swarm[i][3]:
                swarm[i] = (position, velocity, position, fitness)
                best_positions[i] = position
            
            # Update global best position and fitness
            if fitness > global_best_fitness:
                global_best_position = position
                global_best_fitness = fitness
        
        iteration += 1

    # Retrieve the optimized solution
    optimized_solution = global_best_position

    # Print or process the optimized solution
    print("Optimized solution:", optimized_solution)

    return optimized_solution

optimums = []
clusters_num=3
cluster_dict=subspace.subspace_by_clustering(k=clusters_num)
cluster_bounds=[]


for i in range(clusters_num):
    cs=cluster_dict['cluster_bounds'][i]
    cluster_bounds.append(cs)
for i in range(clusters_num):
    space=np.array(cluster_bounds[i])
    optimums.append(pso(space))
# Add optimums to the list by calling the method on each clusters




number of clusters:  3
Optimized solution: [  7.7346803   71.95928334  46.60043745  62.37678359  68.06999941
 -89.59027392]
Optimized solution: [  8.76031135  40.55662898  -6.61462885  56.31099429 -26.07084592
  30.11346753]
Optimized solution: [  8.73476353  73.08784576 -70.01043588  18.12961852   1.58200785
 -17.2695881 ]


In [16]:
for j in range(3,7):
    optimums = []
    clusters_num=j
    cluster_dict=subspace.subspace_by_clustering(k=clusters_num)
    cluster_bounds=[]
    for i in range(clusters_num):
        cs=cluster_dict['cluster_bounds'][i]
        cluster_bounds.append(cs)
    for i in range(clusters_num):
        space=np.array(cluster_bounds[i])
        optimums.append(pso(space))
    for i in range(len(optimums)):
        optimums[i] = np.array(optimums[i]).reshape(1, 6)
    print('cluster'+str(j))
    for i in range(len(optimums)):
        print(regr.predict(optimums[i]))

number of clusters: 



 3
Optimized solution: [  8.60194458  73.61280169 -20.55686206  37.48346663  51.0517599
 -34.39875699]
Optimized solution: [  5.48562994  70.28128261 -13.11630996  56.32059822  62.83672225
 -62.12416428]
Optimized solution: [  7.21209458  72.48368119 -74.03702474  23.27654544 -87.8637867
  61.57844752]
cluster3
[5.53439485]
[4.47122287]
[4.66857727]
number of clusters:  4




Optimized solution: [  7.75955253  70.47774929 -44.3433368   45.80326847 -17.57107888
 -77.73961591]
Optimized solution: [  6.62976576  37.56407123  72.4718773   25.00747342  69.92602355
 -16.64669449]
Optimized solution: [  6.75700885  50.22364713 -81.94753579  30.64048392  63.41618823
  62.12317902]
Optimized solution: [  4.9924099   72.79101051 -59.74766349  23.14636877  24.8005534
   4.2977309 ]
cluster4
[4.24331645]
[5.36027789]
[5.16321972]
[4.22815437]




number of clusters:  5
Optimized solution: [ 8.64150191 65.54332476 42.08069086 47.95866577 43.05291127 57.11097896]
Optimized solution: [ 6.69512335 61.30220703 54.30012771  0.86226022  2.78725062 58.79103409]
Optimized solution: [8.656963573446202, 72.75769676092953, -85.0106398114807, 72.79867816645357, 57.81824403338953, -38.91886608894043]
Optimized solution: [  8.56464478  67.09260455 -41.30663043  48.56059606  68.35908552
  35.54209416]
Optimized solution: [  7.56958944  60.29045828  63.94450131  72.31511544 -87.6070886
 -72.44192487]
cluster5
[5.59985094]
[4.12330597]
[5.8170944]
[6.86545055]
[3.81135368]




number of clusters:  6
Optimized solution: [  6.58845228  64.81187782  61.2206245   60.92191753  48.85215376
 -73.30923657]
Optimized solution: [  6.97744829  65.27895753  46.86559776  21.50341876 -85.48808368
  73.53412159]
Optimized solution: [  8.05828227  53.94643392 -63.74307491  34.06220791 -33.22002072
   5.6920361 ]
Optimized solution: [ 8.12416411 70.92798939 67.54948745  2.76811036 66.44913201 35.27403   ]
Optimized solution: [  6.09751309  54.14006003 -42.87549335   8.27595938  62.50962169
   3.79560037]
Optimized solution: [  7.65345695  72.57612558 -57.53078084  23.16508743   8.48707299
  63.63757895]
cluster6
[4.64313134]
[4.13926341]
[4.36189626]
[5.36634153]
[4.68045037]
[4.91139652]


In [17]:
# cluster_dict['kmeans'].predict(firstrow)

In [18]:
for i in range(len(optimums)):
  optimums[i] = np.array(optimums[i]).reshape(1, 6)

In [19]:
for i in range(len(optimums)):
   print(regr.predict(optimums[i]))

[4.64313134]
[4.13926341]
[4.36189626]
[5.36634153]
[4.68045037]
[4.91139652]


In [20]:
for i in range(len(optimums)):
    print(optimums[i])

[[  6.58845228  64.81187782  61.2206245   60.92191753  48.85215376
  -73.30923657]]
[[  6.97744829  65.27895753  46.86559776  21.50341876 -85.48808368
   73.53412159]]
[[  8.05828227  53.94643392 -63.74307491  34.06220791 -33.22002072
    5.6920361 ]]
[[ 8.12416411 70.92798939 67.54948745  2.76811036 66.44913201 35.27403   ]]
[[  6.09751309  54.14006003 -42.87549335   8.27595938  62.50962169
    3.79560037]]
[[  7.65345695  72.57612558 -57.53078084  23.16508743   8.48707299
   63.63757895]]


In [21]:
# cluster_dict['kmeans']


In [22]:
# regr.predict(np.array(optimized_solution).reshape(1, 6))

In [23]:
min_dist=200
max_speed=0
max_index=0
for optima in range(len(optimums)):
    for index,row in df.iterrows():
        #  euclidean distance between aaron and each row
        dist=np.linalg.norm(optima-row[0:6])
        if dist<min_dist:
            min_dist=dist
            max_speed=row[-1]
            max_index=index
    print('The closest sample is the {}th sample'.format(max_index))
    print('The distance between the closest sample and the optimization result is {}'.format(min_dist))
    print('The speed of the closest sample is {}'.format(max_speed))

The closest sample is the 113th sample
The distance between the closest sample and the optimization result is 21.307275752662516
The speed of the closest sample is 0.5517828203617182
The closest sample is the 113th sample
The distance between the closest sample and the optimization result is 19.8997487421324
The speed of the closest sample is 0.5517828203617182
The closest sample is the 113th sample
The distance between the closest sample and the optimization result is 18.708286933869708
The speed of the closest sample is 0.5517828203617182
The closest sample is the 158th sample
The distance between the closest sample and the optimization result is 17.74823934929885
The speed of the closest sample is 0.485082041228827
The closest sample is the 158th sample
The distance between the closest sample and the optimization result is 17.05872210923198
The speed of the closest sample is 0.485082041228827
The closest sample is the 158th sample
The distance between the closest sample and the opti

In [29]:
cluster_dict['kmeans']

In [31]:
df_no_speed = df[['Snelheid', 'Omvang1', 'Positie1', 'Relatie', 'Omvang2', "Positie2"]]
df_no_speed.Cluster = ''
num_cluster = 6
cluster_dict = subspace.subspace_by_clustering(k = num_cluster)

# Assuming 'predictions' is a list or array containing the predicted cluster values
for index, row in df_no_speed.iterrows():
    temp = row.values
    temp = np.array(temp).reshape(1,6)
    print(temp)
    df_no_speed.loc[index, 'Cluster'] = cluster_dict['kmeans'].predict(temp)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_no_speed.loc[index, 'Cluster'] = cluster_dict['kmeans'].predict(temp)


number of clusters:  6
[  0.   0. -15.  15.  15. -75.]
[[  0.   0. -15.  15.  15. -75.]]
[  0.   0.  15. -90.  30.  30.]
[[  0.   0.  15. -90.  30.  30.]]


ValueError: Must have equal len keys and value when setting with an iterable

In [27]:
df['Cluster'] = df_no_speed['Cluster']
clusters = df['Cluster'].unique()
models = []

for i in clusters:

    sub_df = df[df['Cluster'] == i]
    X = sub_df.drop(columns = ['Speed'], axis = 1).values
    y = sub_df.Speed.values

    # Split the data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.33, random_state = 18)

    regr = RandomForestRegressor(max_depth=6, random_state=42, criterion='mae')
    regr.fit(X_train, y_train)
    y_pred = regr.predict(X_test)

    print('R^2 score: ',regr.score(X_test, y_test))
    print('MSE : ', mean_squared_error(y_test, y_pred))

    models.append(regr)

InvalidParameterError: The 'criterion' parameter of RandomForestRegressor must be a str among {'absolute_error', 'poisson', 'squared_error', 'friedman_mse'}. Got 'mae' instead.