# Zakharov Function Optimization

## 1. CUDA Parallel Implementation

In [14]:
%%writefile zakharov_cuda.cu
#include <iostream>
#include <fstream>
#include <cmath>
#include <curand_kernel.h>
#include <cfloat>
#include <chrono>

using namespace std;

#define POP     256
#define DIM     5
#define MAX_IT  8000
#define LB      -100.0
#define UB      100.0

//============================ DEVICE RANDOM ============================//

__device__ double randF(curandState* state, double a, double b){
    double r = curand_uniform_double(state);
    return a + r * (b - a);
}

__device__ int randInt(curandState* state, int a, int b){
    float r = curand_uniform(state);
    return (int)(a + r * (b - a + 0.99999f));
}

//============================= ZAKHAROV FITNESS ========================//

__device__ double fitness(double* x, int dim){
    double s1 = 0, s2 = 0;
    for(int i=0;i<dim;i++){
        s1 += x[i]*x[i];
        s2 += 0.5*(i+1)*x[i];
    }
    return s1 + pow(s2,2) + pow(s2,4);
}

//======================= POPULATION INITIALIZATION =====================//

__global__ void init_population(double* pop, curandState* rng, unsigned long seed){
    int id = blockIdx.x*blockDim.x + threadIdx.x;
    if(id >= POP) return;

    curand_init(seed, id, 0, &rng[id]);
    for(int d=0; d<DIM; d++)
        pop[id*DIM+d] = randF(&rng[id], LB, UB);
}

//============================= FITNESS EVALUATION ======================//

__global__ void evalFit(double* pop, double* fit){
    int id = blockIdx.x*blockDim.x + threadIdx.x;
    if(id >= POP) return;

    double temp[DIM];
    for(int d=0; d<DIM; d++) temp[d] = pop[id*DIM+d];
    fit[id] = fitness(temp,DIM);
}

//======================== LOA UPDATE KERNEL ============================//

__global__ void LOA(double* pop, double* fit, double* npop, double* nfit, curandState* rng, int t){
    int id = blockIdx.x*blockDim.x + threadIdx.x;
    if(id >= POP) return;

    curandState local = rng[id];

    // find better individuals
    int better=0;
    for(int j=0;j<POP;j++) if(fit[j]<fit[id]) better++;

    int betterIdx=-1;
    if(better>0){
        int pick=randInt(&local,0,better-1),c=0;
        for(int j=0;j<POP;j++)
            if(fit[j]<fit[id] && c++==pick){ betterIdx=j; break; }
    }

    double cur[DIM],cand[DIM];
    for(int d=0;d<DIM;d++) cur[d] = pop[id*DIM+d];

    double p = curand_uniform_double(&local);

    if(p<0.5 && betterIdx!=-1){
        for(int d=0;d<DIM;d++){
            double r=curand_uniform_double(&local);
            int I=randInt(&local,1,2);
            double best=pop[betterIdx*DIM+d];
            cand[d]= cur[d]+r*(best - I*cur[d]);
            cand[d]=max(LB,min(UB,cand[d]));
        }
    }else{
        for(int d=0;d<DIM;d++){
            double r=curand_uniform_double(&local);
            cand[d]=cur[d] + (1-2*r)*(UB-LB)/t;
            cand[d]=max(LB,min(UB,cand[d]));
        }
    }

    double fc = fitness(cand,DIM);

    if(fc<fit[id]){
        for(int d=0;d<DIM;d++) npop[id*DIM+d]=cand[d];
        nfit[id]=fc;
    }else{
        for(int d=0;d<DIM;d++) npop[id*DIM+d]=cur[d];
        nfit[id]=fit[id];
    }

    rng[id]=local;
}

//=======================================================================//
//                              MAIN                                     //
//=======================================================================//

int main(){

    auto t1 = chrono::high_resolution_clock::now();

    // ================= CSV LOGGER ================= //
    ofstream csv("zakharov_log.csv");
    csv<<"iter,best";
    for(int d=1;d<=DIM;d++) csv<<",x"<<d;
    csv<<"\n";

    double *d_pop,*d_fit,*d_newpop,*d_newfit;
    curandState *d_rng;

    size_t PS=POP*DIM*sizeof(double), FS=POP*sizeof(double);

    cudaMalloc(&d_pop,PS);
    cudaMalloc(&d_fit,FS);
    cudaMalloc(&d_newpop,PS);
    cudaMalloc(&d_newfit,FS);
    cudaMalloc(&d_rng,POP*sizeof(curandState));

    int threads=128, blocks=(POP+threads-1)/threads;

    init_population<<<blocks,threads>>>(d_pop,d_rng,time(NULL));
    evalFit<<<blocks,threads>>>(d_pop,d_fit);
    cudaDeviceSynchronize();

    double hfit[POP], hpop[POP*DIM];
    double best=DBL_MAX;

    for(int it=1; it<=MAX_IT; it++){

        LOA<<<blocks,threads>>>(d_pop,d_fit,d_newpop,d_newfit,d_rng,it);
        cudaDeviceSynchronize();

        swap(d_pop,d_newpop);
        swap(d_fit,d_newfit);

        cudaMemcpy(hfit,d_fit,FS,cudaMemcpyDeviceToHost);
        cudaMemcpy(hpop,d_pop,PS,cudaMemcpyDeviceToHost);

        int bi=0;
        for(int i=1;i<POP;i++) if(hfit[i]<hfit[bi]) bi=i;
        best = min(best,hfit[bi]);

        csv<<it<<","<<best;
        for(int d=0;d<DIM;d++) csv<<","<<hpop[bi*DIM+d];
        csv<<"\n";
    }

    csv.close();
    auto t2 = chrono::high_resolution_clock::now();
    cout<<"Execution Time: "<<chrono::duration<double>(t2-t1).count()<<" sec\n";
    cout<<"CSV saved as zakharov_log.csv\n";

    return 0;
}


Overwriting zakharov_cuda.cu


In [15]:
!nvcc -arch=sm_75 zakharov_cuda.cu -o zakharov_cuda
!./zakharov_cuda

Execution Time: 0.715493 sec
CSV saved as zakharov_log.csv


## 2. Serial (Sequential) Implementation

In [5]:

%%writefile zakharov_serial.cpp
#include <bits/stdc++.h>
using namespace std;

/* ---------------------------------------
      Random Generator
---------------------------------------*/
mt19937 rng(time(NULL));

double randF(double a, double b) {
    uniform_real_distribution<double> dist(a, b);
    return dist(rng);
}
int randInt(int a, int b) {
    uniform_int_distribution<int> dist(a, b);
    return dist(rng);
}

/* ---------------------------------------
            Fitness Function
---------------------------------------*/
double zakharov(const vector<double> &x) {
    int dim = x.size();

    double sum1 = 0.0;
    double sum2 = 0.0;
    for (int i = 0; i < dim; ++i) {
        sum1 += x[i] * x[i];
        sum2 += 0.5 * (i + 1) * x[i];
    }
    return sum1 + pow(sum2, 2) + pow(sum2, 4);

}

/* ---------------------------------------
            LOA PARAMETERS
---------------------------------------*/
int POP = 256;
int DIM = 5;
int MAX_IT = 8000;
double LB = -100;
double UB = 100;

/* ---------------------------------------
     Escape (Global Search)
---------------------------------------*/
vector<double> escape(const vector<double> &x,
                      const vector<double> &SSA)
{
    vector<double> newX = x;
    for(int j=0;j<DIM;j++){
        double r = randF(LB, UB); // Note: Original code used randF(rng,0,1) here but logic was r*(SSA-I*x).
        // Wait, original code: double r = randF(rng,0,1);
        // Let's stick to original logic.
        double r_val = randF(0, 1);
        int I = randInt(1, 2);
        newX[j] = x[j] + r_val * (SSA[j] - I*x[j]);

        newX[j] = min(max(newX[j],LB),UB);
    }
    return newX;
}

/* ---------------------------------------
     Hide (Local Search)
---------------------------------------*/
vector<double> hide(const vector<double> &Xi,int t){
    vector<double> newX = Xi;

    for(int j=0;j<DIM;j++){
        double r = randF(0, 1);
        newX[j] = Xi[j] + (1 - 2*r)*(UB-LB)/t;
        newX[j] = min(max(newX[j],LB),UB);
    }
    return newX;
}

/* ---------------------------------------
         Initialize population
---------------------------------------*/
vector<vector<double>> init_population(){
    vector<vector<double>> pop(POP, vector<double>(DIM));
    for(int i=0;i<POP;i++)
        for(int d=0;d<DIM;d++)
            pop[i][d] = randF(LB, UB);
    return pop;
}

/* =======================================
             MAIN LOA SERIAL
=======================================*/
int main(){
    auto t1 = chrono::high_resolution_clock::now();

    vector<vector<double>> pop = init_population();
    vector<double> fitness(POP);

    // Initial fitness
    for(int i=0;i<POP;i++) fitness[i]=zakharov(pop[i]);

    double bestFit = 1e18;
    vector<double> bestSol(DIM);

    // find initial best
    for(int i=0;i<POP;i++){
        if(fitness[i]<bestFit){
            bestFit=fitness[i];
            bestSol=pop[i];
        }
    }

    /* --------------------------------------
                LOA ITERATIONS
    ---------------------------------------*/
    for(int it=1; it<=MAX_IT; it++){

        for(int i=0;i<POP;i++){

            vector<int> better;
            for(int j=0;j<POP;j++)
                if(fitness[j]<fitness[i]) better.push_back(j);

            int betterIdx=-1;
            if(!better.empty())
                betterIdx = better[randInt(0,(int)better.size()-1)];

            vector<double> candidate;
            if(randF(0,1)<0.5 && betterIdx!=-1)
                candidate = escape(pop[i],pop[betterIdx]);
            else
                candidate = hide(pop[i],it);

            double f = zakharov(candidate);

            if(f<fitness[i]){
                pop[i]=candidate;
                fitness[i]=f;
            }
            if(f<bestFit){
                bestFit=f;
                bestSol=candidate;
            }
        }

        if(it % 1000 == 0)
            cout<<"Iter "<<it<<" | Best = "<<bestFit<<"\n";
    }

    cout<<"\nFinal Best Solution:\n";
    for(int i=0;i<DIM;i++) cout<<"x"<<i+1<<" = "<<bestSol[i]<<endl;
    cout<<"\nBest " << "Zakharov" << " Value = "<<bestFit<<endl;

    auto t2 = chrono::high_resolution_clock::now();
    cout<<"\nExecution Time = "
        <<chrono::duration<double>(t2-t1).count()
        <<" sec\n";

    return 0;
}


Writing zakharov_serial.cpp


In [6]:
!g++ zakharov_serial.cpp -o zakharov_serial
!./zakharov_serial

Iter 1000 | Best = 5.21409e-216
Iter 2000 | Best = 0
Iter 3000 | Best = 0
Iter 4000 | Best = 0
Iter 5000 | Best = 0
Iter 6000 | Best = 0
Iter 7000 | Best = 0
Iter 8000 | Best = 0

Final Best Solution:
x1 = 4.8995e-163
x2 = 4.83247e-163
x3 = -3.45983e-163
x4 = 7.3549e-163
x5 = -5.78636e-164

Best Zakharov Value = 0

Execution Time = 7.69702 sec


## 3. OpenMP Parallel Implementation

In [7]:

%%writefile zakharov_omp.cpp
#include <bits/stdc++.h>
#include <omp.h>
using namespace std;

/* ---------------------------------------
      Thread-safe Random Generator
---------------------------------------*/
double randF(mt19937 &rng, double a, double b) {
    uniform_real_distribution<double> dist(a, b);
    return dist(rng);
}
int randInt(mt19937 &rng, int a, int b) {
    uniform_int_distribution<int> dist(a, b);
    return dist(rng);
}

/* ---------------------------------------
            Fitness Function
---------------------------------------*/
double zakharov(const vector<double> &x) {
    int dim = x.size();

    double sum1 = 0.0;
    double sum2 = 0.0;
    for (int i = 0; i < dim; ++i) {
        sum1 += x[i] * x[i];
        sum2 += 0.5 * (i + 1) * x[i];
    }
    return sum1 + pow(sum2, 2) + pow(sum2, 4);

}

/* ---------------------------------------
            LOA PARAMETERS
---------------------------------------*/
int POP = 256;
int DIM = 5;
int MAX_IT = 8000;
double LB = -100;
double UB = 100;

/* ---------------------------------------
     Escape (Global Search)  — parallel safe
---------------------------------------*/
vector<double> escape(const vector<double> &x,
                      const vector<double> &SSA,
                      mt19937 &rng)
{
    vector<double> newX = x;
    for(int j=0;j<DIM;j++){
        double r = randF(rng,0,1);
        int I = randInt(rng,1,2);
        newX[j] = x[j] + r * (SSA[j] - I*x[j]);

        newX[j] = min(max(newX[j],LB),UB);
    }
    return newX;
}

/* ---------------------------------------
     Hide (Local Search) — parallel safe
---------------------------------------*/
vector<double> hide(const vector<double> &Xi,int t,mt19937 &rng){
    vector<double> newX = Xi;

    for(int j=0;j<DIM;j++){
        double r = randF(rng,0,1);
        newX[j] = Xi[j] + (1 - 2*r)*(UB-LB)/t;
        newX[j] = min(max(newX[j],LB),UB);
    }
    return newX;
}

/* ---------------------------------------
         Initialize population (PARALLEL)
---------------------------------------*/
vector<vector<double>> init_population(vector<mt19937> &rngs){
    vector<vector<double>> pop(POP, vector<double>(DIM));

    #pragma omp parallel
    {
        int tid = omp_get_thread_num();
        mt19937 &local_rng = rngs[tid];

        #pragma omp for schedule(static)
        for(int i=0;i<POP;i++)
            for(int d=0;d<DIM;d++)
                pop[i][d] = randF(local_rng,LB,UB);
    }
    return pop;
}

/* =======================================
             MAIN LOA PARALLEL
=======================================*/
int main(){
    auto t1 = chrono::high_resolution_clock::now();

    int threads = omp_get_max_threads();
    vector<mt19937> rngs(threads);

    random_device rd;
    for(int i=0;i<threads;i++)
        rngs[i].seed(rd()+i*111);

    vector<vector<double>> pop = init_population(rngs);
    vector<double> fitness(POP);

    // Initial fitness
    for(int i=0;i<POP;i++) fitness[i]=zakharov(pop[i]);

    double bestFit = 1e18;
    vector<double> bestSol(DIM);

    // find initial best
    for(int i=0;i<POP;i++){
        if(fitness[i]<bestFit){
            bestFit=fitness[i];
            bestSol=pop[i];
        }
    }

    /* --------------------------------------
                LOA ITERATIONS
       Full population parallel every step
    ---------------------------------------*/
    for(int it=1; it<=MAX_IT; it++){

        #pragma omp parallel
        {
            int tid = omp_get_thread_num();
            mt19937 &localRng = rngs[tid];

            double localBest = 1e18;
            vector<double> localBestSol(DIM);

            #pragma omp for schedule(static)
            for(int i=0;i<POP;i++){

                vector<int> better;
                for(int j=0;j<POP;j++)
                    if(fitness[j]<fitness[i]) better.push_back(j);

                int betterIdx=-1;
                if(!better.empty())
                    betterIdx = better[randInt(localRng,0,(int)better.size()-1)];

                vector<double> candidate;
                if(randF(localRng,0,1)<0.5 && betterIdx!=-1)
                    candidate = escape(pop[i],pop[betterIdx],localRng);
                else
                    candidate = hide(pop[i],it,localRng);

                double f = zakharov(candidate);

                if(f<fitness[i]){
                    pop[i]=candidate;
                    fitness[i]=f;
                }
                if(f<localBest){
                    localBest=f;
                    localBestSol=candidate;
                }
            }

            // Update global best safely
            #pragma omp critical
            {
                if(localBest<bestFit){
                    bestFit=localBest;
                    bestSol=localBestSol;
                }
            }
        }

        if(it % 1000 == 0)
            cout<<"Iter "<<it<<" | Best = "<<bestFit<<"\n";
    }

    cout<<"\nFinal Best Solution:\n";
    for(int i=0;i<DIM;i++) cout<<"x"<<i+1<<" = "<<bestSol[i]<<endl;
    cout<<"\nBest " << "Zakharov" << " Value = "<<bestFit<<endl;

    auto t2 = chrono::high_resolution_clock::now();
    cout<<"\nExecution Time = "
        <<chrono::duration<double>(t2-t1).count()
        <<" sec\n";

    return 0;
}


Writing zakharov_omp.cpp


In [8]:
!g++ -fopenmp zakharov_omp.cpp -o zakharov_omp
!./zakharov_omp

Iter 1000 | Best = 2.71713e-216
Iter 2000 | Best = 0
Iter 3000 | Best = 0
Iter 4000 | Best = 0
Iter 5000 | Best = 0
Iter 6000 | Best = 0
Iter 7000 | Best = 0
Iter 8000 | Best = 0

Final Best Solution:
x1 = -1.48235e-162
x2 = 4.98495e-164
x3 = 8.48052e-164
x4 = 2.6702e-163
x5 = 6.6285e-164

Best Zakharov Value = 0

Execution Time = 6.76557 sec
