In [1]:
from IPython.core.display import display, HTML # for some notebook formatting.

import mlrose_hiive
import numpy as np
import logging
import networkx as nx
from ast import literal_eval

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.metrics import accuracy_score
from mlrose_hiive import QueensGenerator, MaxKColorGenerator
from mlrose_hiive import SARunner, GARunner, NNGSRunner

# switch off the chatter
logging.basicConfig(level=logging.WARNING)

In [2]:
from mlrose_hiive import RHCRunner

In [3]:
from mlrose_hiive import CustomFitness

In [4]:
# # Define alternative N-Queens fitness function for maximization problem
# def queens_max(state):

#    # Initialize counter
#     fitness_cnt = 0

#           # For all pairs of queens
#     for i in range(len(state) - 1):
#         for j in range(i + 1, len(state)):

#                 # Check for horizontal, diagonal-up and diagonal-down attacks
#             if (state[j] != state[i])  and (state[j] != state[i] + (j - i)) and (state[j] != state[i] - (j - i)):

#                    # If no attacks, then increment counter
#                            fitness_cnt += 1

#     return fitness_cnt

# # Initialize custom fitness function object
# fitness_cust = CustomFitness(queens_max)
problem = QueensGenerator().generate(seed=123456, size=20)


In [5]:
import time

In [6]:
# Generate a new 8-Queen problem using a fixed seed.

# create a runner class and solve the problem
sa = SARunner(problem=problem,FEvals=True,
              experiment_name='queens20_sa',maximize=False,
              output_directory="Results", # note: specify an output directory to have results saved to disk
              seed=123456,
              iteration_list=2 ** np.arange(11),
              max_attempts=1000,
              temperature_list=[100,1000,100000,10000000,1000000000],
              decay_list=[mlrose_hiive.GeomDecay])


start = time.time()
df_run_stats, df_run_curves = sa.run()
# sleeping for 1 sec to get 10 sec runtime
time.sleep(1)

# program body ends

# end time
end = time.time()

# total time taken
print(f"Runtime of the program is {end - start}")





Runtime of the program is 9.469554901123047


In [6]:
# df_run_stats.columns

In [7]:
# HTML(df_run_stats[['Iteration', 'Fitness',  'Time', 'State']][0:12].to_html())


In [8]:
state_sample = df_run_stats[['schedule_current_value', 'schedule_init_temp', 'schedule_min_temp']][:1]
HTML(state_sample.to_html())


Unnamed: 0,schedule_current_value,schedule_init_temp,schedule_min_temp
0,0.099999,0.1,0.001


In [7]:
best_fitness_ga = df_run_curves['Fitness'].min()
best_runs_ga = df_run_curves[df_run_curves['Fitness'] == best_fitness_ga]

HTML(best_runs_ga.to_html())

Unnamed: 0,Iteration,Time,Fitness,Temperature,max_iters,FEvals,maximize
654,654,0.709742,1.0,100,1024,1496,False
655,655,0.710337,1.0,100,1024,1496,False
656,656,0.711032,1.0,100,1024,1496,False
657,657,0.712068,1.0,100,1024,1496,False
658,658,0.712749,1.0,100,1024,1496,False
659,659,0.713308,1.0,100,1024,1496,False
660,660,0.714131,1.0,100,1024,1496,False
661,661,0.715019,1.0,100,1024,1496,False
662,662,0.715902,1.0,100,1024,1496,False
663,663,0.716788,1.0,100,1024,1496,False


In [10]:
# Generate a new Max K problem using a fixed seed.
# problem = MaxKColorGenerator().generate(seed=123456, number_of_nodes=10, max_connections_per_node=3, max_colors=3)

In [11]:
# nx.draw(problem.source_graph,
#         pos=nx.spring_layout(problem.source_graph, seed = 3))

In [12]:
# GARunner(fe)

In [13]:
import time

In [7]:
ga = GARunner(problem=problem,
#               FEvals=True,
              experiment_name='queens20_ga',maximize=False,
#               output_directory=None, # note: specify an output directory to have results saved to disk
              output_directory="Results",
              seed=123456,
              iteration_list=2 ** np.arange(11),
              population_sizes=[10, 20, 50,100,150,200],
             mutation_rates=np.arange(0,0.4,0.015))

# the two data frames will contain the results
# df_run_stats_ga, df_run_curves_ga = ga.run()
# starting time
start = time.time()
df_run_stats_ga, df_run_curves_ga = ga.run()
# sleeping for 1 sec to get 10 sec runtime
time.sleep(1)

# program body ends

# end time
end = time.time()

# total time taken
print(f"Runtime of the program is {end - start}")

Runtime of the program is 6156.852116107941


In [15]:
df_run_stats_ga.columns

Index(['Iteration', 'Fitness', 'Time', 'State', 'Population Size',
       'Mutation Rate', 'max_iters', 'maximize', 'FEvals'],
      dtype='object')

In [16]:
HTML(df_run_stats_ga[['Iteration', 'Fitness', 'FEvals', 'Time', 'State']][0:1].to_html())


Unnamed: 0,Iteration,Fitness,FEvals,Time,State
0,0,16.0,10,0.010155,"[1, 10, 2, 1, 8, 11, 11, 11, 0, 7, 4, 8, 10, 10, 12]"


In [17]:
state_sample_ga = df_run_stats_ga[['Population Size', 'Mutation Rate']][:1]
HTML(state_sample_ga.to_html())

Unnamed: 0,Population Size,Mutation Rate
0,10,0.0


In [30]:
best_fitness_ga = df_run_curves_ga['Fitness'].min()
best_runs_ga = df_run_curves_ga[df_run_curves_ga['Fitness'] == best_fitness_ga]

HTML(best_runs_ga.to_html())

Unnamed: 0,Iteration,Time,Fitness,Population Size,Mutation Rate,max_iters,maximize,FEvals
6920,781,1.59479,0.0,10,0.12,1024,False,8611
12102,697,1.132138,0.0,10,0.21,1024,False,7686
14397,502,2.826688,0.0,10,0.255,1024,False,5542
20605,456,2.577255,0.0,10,0.39,1024,False,5035
26062,984,2.708031,0.0,20,0.09,1024,False,20693
26925,862,2.047512,0.0,20,0.105,1024,False,18130
33246,430,2.438734,0.0,20,0.255,1024,False,9057
35828,636,0.780363,0.0,20,0.315,1024,False,13386
39072,232,1.359318,0.0,20,0.39,1024,False,4899
39600,14,0.128178,0.0,50,0.015,1024,False,771


This gives us nine candidates for the best run. We are going to pick the one with that reached the best fitness value in the fewest number of evaluations.

(We could also have chosen to use Iterations as our criteria.)



In [19]:

minimum_evaluations = best_runs_ga['FEvals'].min()

best_curve_run = best_runs_ga[best_runs_ga['FEvals'] == minimum_evaluations]

In [36]:
HTML(best_curve_run.to_html())


Unnamed: 0,Iteration,Time,Fitness,Population Size,Mutation Rate,max_iters,maximize,FEvals
39600,14,0.128178,0.0,50,0.015,1024,False,771


In [21]:
best_mr = best_curve_run['Mutation Rate'].iloc()[0]
best_pop_size = best_curve_run['Population Size'].iloc()[0]
print(f'Best Mutation Rate: {best_mr}, best Population Size: {best_pop_size}')


Best Mutation Rate: 0.015, best Population Size: 50


In [22]:
run_stats_best_run = df_run_stats_ga[(df_run_stats_ga['Mutation Rate'] == best_mr) & (df_run_stats_ga['Population Size'] == best_pop_size)]
HTML(run_stats_best_run[['Iteration', 'Fitness', 'FEvals', 'Time']].to_html())

Unnamed: 0,Iteration,Fitness,FEvals,Time
660,0,16.0,50,0.022899
661,1,8.0,102,0.361387
662,2,7.0,154,0.693813
663,4,6.0,257,1.088286
664,8,3.0,463,1.497698
665,16,0.0,771,1.998969
666,32,0.0,771,1.998969
667,64,0.0,771,1.998969
668,128,0.0,771,1.998969
669,256,0.0,771,1.998969


In [23]:
best_state = run_stats_best_run[['State']][:1]
HTML(best_state.to_html())

Unnamed: 0,State
660,"[1, 10, 2, 1, 8, 11, 11, 11, 0, 7, 4, 8, 10, 10, 12]"


In [24]:
# print([n for n in problem.source_graph.nodes])

In [25]:
# color_indexes = literal_eval(run_stats_best_run['State'].tail(1).values[0])
# ordered_state = [color_indexes[n] for n in problem.source_graph.nodes]
# print(ordered_state)


In [26]:
# colors = ['lightcoral', 'lightgreen', 'yellow']
# node_color_map = [colors[s] for s in ordered_state]

# nx.draw(problem.source_graph,
#         pos=nx.spring_layout(problem.source_graph, seed = 3),
#         with_labels=True,
#         node_color=node_color_map)

In [27]:
# best_state, best_fitness, fitness_curve, fitness_evals = mlrose_hiive.simulated_annealing(problem,max_attempts=10, max_iters=1000, init_state=[0]
#                                                                                           , random_state=1, curve=True, fevals=True)

In [28]:
# sa = SARunner(problem=problem,
#               experiment_name='queen15_sa',maximize=False,
# #               output_directory=None, # note: specify an output directory to have results saved to disk
#               output_directory="Results",
#               seed=123456,
#               iteration_list=2 ** np.arange(11),
#               max_attempts=1000,
#               temperature_list=[0.1, 0.5, 0.75, 1.0, 2.0, 5.0,250],
#               decay_list=[mlrose_hiive.GeomDecay])

# # the two data frames will contain the results

# # starting time
# start = time.time()
# df_run_stats_sa, df_run_curves_sa = sa.run()
# # sleeping for 1 sec to get 10 sec runtime
# time.sleep(1)

# # program body ends

# # end time
# end = time.time()

# # total time taken
# print(f"Runtime of the program is {end - start}")

In [29]:
df_run_stats_sa.columns

NameError: name 'df_run_stats_sa' is not defined

In [None]:
# sa.best_state

In [None]:
HTML(df_run_stats_sa[['Iteration', 'Fitness', 'FEvals', 'Time', 'State']][0:1].to_html())




In [None]:
state_sample_sa = df_run_stats_sa[['schedule_current_value', 'schedule_init_temp', 'schedule_min_temp']][:1]
HTML(state_sample_sa.to_html())


In [None]:
# state_sample_sa = df_run_stats_sa[['Population Size', 'Mutation Rate']][:1]
# HTML(state_sample_sa.to_html())

best_fitness_sa = df_run_curves_sa['Fitness'].max()
best_runs_sa = df_run_curves_sa[df_run_curves_sa['Fitness'] == best_fitness_sa]

HTML(best_runs_sa.to_html())


minimum_evaluations = best_runs_sa['FEvals'].min()

best_curve_run = best_runs_sa[best_runs_sa['FEvals'] == minimum_evaluations]

HTML(best_curve_run.to_html())


In [None]:

best_init_temperature = best_curve_run['Temperature'].iloc()[0].init_temp

print(f'Best initial temperature: {best_init_temperature}')

In [None]:

run_stats_best_run = df_run_stats_sa[df_run_stats_sa['schedule_init_temp'] == best_init_temperature]
HTML(run_stats_best_run[['Iteration', 'Fitness', 'FEvals', 'Time', 'State']].to_html())

In [None]:
best_state = run_stats_best_run[['schedule_current_value', 'schedule_init_temp', 'schedule_min_temp']][:1]
HTML(best_state.to_html())

In [None]:

# print([n for n in problem.source_graph.nodes])

# color_indexes = literal_eval(run_stats_best_run['State'].tail(1).values[0])
# ordered_state = [color_indexes[n] for n in problem.source_graph.nodes]
# print(ordered_state)


# colors = ['lightcoral', 'lightgreen', 'yellow']
# node_color_map = [colors[s] for s in ordered_state]

# nx.draw(problem.source_graph,
#         pos=nx.spring_layout(problem.source_graph, seed = 3),
#         with_labels=True,
#         node_color=node_color_map)

# RHC

In [8]:
rhc = RHCRunner(problem=problem,
                       experiment_name="queen20_RHC",maximize=False,
#                        output_directory="/Users/matthieudivet/Desktop/GaTech/Classes/ML/Assignments/Randomized_optimization/knapsack_problem",
                output_directory="Results",       
                seed=123456,
                       iteration_list=2 ** np.arange(11),
                       max_attempts=1000,
                       restart_list=[0,10,20,40,60,100,150])

# starting time
start = time.time()
rhc_run_stats, rhc_run_curves = rhc.run()
# sleeping for 1 sec to get 10 sec runtime
time.sleep(1)

# program body ends

# end time
end = time.time()

# total time taken
print(f"Runtime of the program is {end - start}")




Runtime of the program is 182.84762120246887


In [None]:
df_run_stats_ga.columns

In [None]:
rhc_run_stats.columns

In [None]:


HTML(rhc_run_stats[['Iteration', 'Fitness', 'FEvals', 'Time', 'State']][0:1].to_html())




state_sample_sa = rhc_run_stats[['Restarts','current_restart']][:1]
HTML(state_sample_sa.to_html())

In [None]:
best_fitness_sa = rhc_run_curves['Fitness'].min()
best_runs_sa = rhc_run_curves[rhc_run_curves['Fitness'] == best_fitness_sa]

HTML(best_runs_sa.to_html())

In [None]:



# # state_sample_sa = rhc_run_stats[['Population Size', 'Mutation Rate']][:1]
# # HTML(state_sample_sa.to_html())




# minimum_evaluations = best_runs_sa['FEvals'].min()

# best_curve_run = best_runs_sa[best_runs_sa['FEvals'] == minimum_evaluations]

# HTML(best_curve_run.to_html())



# best_init_temperature = best_curve_run['Temperature'].iloc()[0].init_temp

# print(f'Best initial temperature: {best_init_temperature}')


# run_stats_best_run = rhc_run_stats[rhc_run_stats['schedule_init_temp'] == best_init_temperature]
# HTML(run_stats_best_run[['Iteration', 'Fitness', 'FEvals', 'Time', 'State']].to_html())

# best_state = run_stats_best_run[['schedule_current_value', 'schedule_init_temp', 'schedule_min_temp']][:1]
# HTML(best_state.to_html())


# print([n for n in problem.source_graph.nodes])

# color_indexes = literal_eval(run_stats_best_run['State'].tail(1).values[0])
# ordered_state = [color_indexes[n] for n in problem.source_graph.nodes]
# print(ordered_state)


# colors = ['lightcoral', 'lightgreen', 'yellow']
# node_color_map = [colors[s] for s in ordered_state]

# nx.draw(problem.source_graph,
#         pos=nx.spring_layout(problem.source_graph, seed = 3),
#         with_labels=True,
#         node_color=node_color_map)

# MIMIC

In [9]:
from mlrose_hiive import MIMICRunner

In [10]:
mimic = MIMICRunner(problem=problem,
                           experiment_name="queen20_MIMIC",
                           output_directory="Results",
                           seed=123456,
                           iteration_list=2 ** np.arange(11),
                           population_sizes=[10,50,70,100,200,250],
                           max_attempts=1000,
                           keep_percent_list=[0.2,0.3,0.4,0.5],
                           use_fast_mimic=True)


In [11]:
# starting time
start = time.time()
mimic_run_stats, mimic_run_curves = mimic.run()
# sleeping for 1 sec to get 10 sec runtime
time.sleep(1)

# program body ends

# end time
end = time.time()

# total time taken
print(f"Runtime of the program is {end - start}")




Runtime of the program is 3268.8826146125793


In [None]:


# HTML(mimic_run_stats[['Iteration', 'Fitness', 'FEvals', 'Time', 'State']][0:1].to_html())




# state_sample_sa = mimic_run_stats[['Restarts','current_restart']][:1]
# HTML(state_sample_sa.to_html())



In [None]:
df_run_stats_ga.columns

In [None]:
import matplotlib.pyplot as plt 

In [None]:
df_run_stats_ga['Iteration'].value_counts()

In [None]:
df_run_stats_ga.head()

In [None]:
df_run_stats_ga['Population Size']

In [None]:
# kgjf

In [None]:
import copy

In [None]:
df_run_stats_ga2=copy.deepcopy(df_run_stats_ga)

In [None]:
x='PS'+df_run_stats_ga2['Population Size'].astype('str')+' MR '+df_run_stats_ga2['Mutation Rate'].astype('str')
x

In [None]:
df_run_stats_ga2['combination']


In [None]:
df_run_stats_ga2['combination']=x
# df_run_stats_ga2['Population Size'].apply('str')+' '+df_run_stats_ga2['Mutation Rate'].apply('str')

In [None]:
# df_run_stats_ga2.set_index('Iteration', inplace=True)
df_run_stats_ga2.groupby('combination')['Fitness'].plot(legend=True)

In [None]:
df_run_stats_ga2.set_index('Iteration', inplace=True)
df_run_stats_ga2.groupby('combination')['FEvals'].plot(legend=True)

In [None]:
# df_run_stats_ga.plot(x='Iteration',y='FEvals')

In [None]:
# x=df_run_stats_ga['Iteration']
# y=df_run_stats_ga['FEvals']
# plt.plot(x, y, label = "line 1") 
# # plt.plot(y, x, label = "line 2") 
# # plt.plot(x, np.sin(x), label = "curve 1") 
# # plt.plot(x, np.cos(x), label = "curve 2") 
# plt.legend() 
# plt.show()

In [35]:
best_fitness_sa = mimic_run_curves['Fitness'].min()
best_runs_sa = mimic_run_curves[mimic_run_curves['Fitness'] == best_fitness_sa]

HTML(best_runs_sa.to_html())

Unnamed: 0,Iteration,Time,Fitness,use_fast_mimic,Population Size,Keep Percent,max_iters,FEvals
22126,9,0.528178,3.0,True,250,0.4,1024,4271
22127,10,0.579944,3.0,True,250,0.4,1024,4271
22128,11,0.630594,3.0,True,250,0.4,1024,4271
22129,12,0.682086,3.0,True,250,0.4,1024,4271
22130,13,0.732866,3.0,True,250,0.4,1024,4271
22131,14,0.784643,3.0,True,250,0.4,1024,4271
22132,15,0.837282,3.0,True,250,0.4,1024,4271
22133,16,0.888003,3.0,True,250,0.4,1024,4271
22134,17,0.957054,3.0,True,250,0.4,1024,8287
22135,18,1.014971,3.0,True,250,0.4,1024,8287
