In [1]:
import tensorflow as tf
from keras import models
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.vgg16 import preprocess_input
import numpy as np
from keras.datasets import cifar100
from keras.applications import MobileNet
from keras.utils import np_utils
from keras.layers import Dense,GlobalAveragePooling2D
from keras.models import Model
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from keras.applications import vgg16
from keras.layers import Dropout, Flatten, Dense, GlobalAveragePooling2D,BatchNormalization
import math
from sklearn.preprocessing import LabelEncoder
from statistics import mean
import keras
import pandas as pd
from sklearn.preprocessing import MaxAbsScaler
from sklearn.preprocessing import MinMaxScaler

In [2]:
def encode(train_label,test_label):
  train_label= to_categorical(train_label, 100)
  test_label=to_categorical(test_label, 100)
  return train_label,test_label

In [3]:
VGGmodel = VGG16()
(train_features,train_labels),(test_features,test_labels) = cifar100.load_data()
Features = np.concatenate((train_features,test_features))
Labels = np.concatenate((train_labels,test_labels))

shuffled_train_features,shuffled_test_features,shuffled_train_labels,shuffled_test_labels=train_test_split(Features,Labels, test_size=10000,random_state=42)


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels.h5
Downloading data from https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz


In [4]:
train_label,test_label = encode(shuffled_train_labels,shuffled_test_labels)
test_label.shape



(10000, 100)

In [5]:
#creating Model with bdropout of 0.4 nodes after every dense layer

base_model = VGG16(weights='imagenet',include_top=False,input_shape=(32, 32, 3))
output_model=Model(base_model.input,base_model.get_layer('block5_pool').output)

preprocess_images_training =preprocess_input(shuffled_train_features)
training_featureset=output_model.predict(preprocess_images_training)
training_featureset=training_featureset.reshape(50000,512)

preprocess_images_testing =preprocess_input(shuffled_test_features)
testing_featureset=base_model.predict(preprocess_images_testing)
testing_featureset=testing_featureset.reshape(10000,512)

scale=MaxAbsScaler()
training_featureset=scale.fit_transform(training_featureset)
testing_featureset=scale.fit_transform(testing_featureset)



Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5


In [6]:
np.savetxt('Train_Features.txt',training_featureset,fmt='%.2f');
np.savetxt('Train_labels.txt',train_label,fmt='%.2f');
np.savetxt('Testing_features.txt',testing_featureset,fmt='%.2f')
np.savetxt('Testing_labels.txt',test_label,fmt='%.2f')

In [7]:
!pip install git+git://github.com/andreinechaev/nvcc4jupyter.git
%load_ext nvcc_plugin

Collecting git+git://github.com/andreinechaev/nvcc4jupyter.git
  Cloning git://github.com/andreinechaev/nvcc4jupyter.git to /tmp/pip-req-build-abvy7c9u
  Running command git clone -q git://github.com/andreinechaev/nvcc4jupyter.git /tmp/pip-req-build-abvy7c9u
Building wheels for collected packages: NVCCPlugin
  Building wheel for NVCCPlugin (setup.py) ... [?25l[?25hdone
  Created wheel for NVCCPlugin: filename=NVCCPlugin-0.0.2-cp36-none-any.whl size=4308 sha256=718ca1fa04765bd11dc70a03c138c094e221f1aeb1b6b6f2f5e208200cf96a9d
  Stored in directory: /tmp/pip-ephem-wheel-cache-d_eupblg/wheels/10/c2/05/ca241da37bff77d60d31a9174f988109c61ba989e4d4650516
Successfully built NVCCPlugin
Installing collected packages: NVCCPlugin
Successfully installed NVCCPlugin-0.0.2
created output directory at /content/src
Out bin /content/result.out


In [8]:
%%cuda --name matrix_multiplication.cu

#include <stdio.h>
#include <stdlib.h>
#include <malloc.h>
#include "/content/sample_data/book.h"
#include<iostream>
#include <cublas_v2.h>
#include <time.h>
#include "cuda_runtime.h"
#include "curand.h"


float* csv_to_matrix(int row_dim,int col_dim)
{
  float* mat=(float *)malloc(50000*512 *sizeof(float*));
  FILE *open_file;

	open_file = fopen("Train_Features.txt", "r");

	for(int i = 0; i < row_dim; i++){
		for (int j = 0; j < col_dim; j++)
		if (fscanf(open_file, "%f,", &mat[i * col_dim + j]) ==EOF) break;
    //checking if the file is over or not
		if (feof(open_file)) break;
	} 
	fclose(open_file);
  return mat;
}

float random_values( float minimum, float maximum )
{
    float range = rand() / (float) RAND_MAX; 
    float value= minimum + range * ( maximum - minimum );
    return value;   
}


float* productcal(cublasHandle_t &handle,float* matrix1,float*matrix2,int row_matrix1,int col_matrix1,int row_matrix2, int col_matrix2){
    float alpha=1.0;
    float beta=0.0;   
    float*result= (float*)malloc(row_matrix1*col_matrix2*sizeof(float));
    float* dev_matrix1,*dev_matrix2,*dev_result;

    HANDLE_ERROR( cudaMalloc( (void**)&dev_matrix1, row_matrix1 *col_matrix1  * sizeof(float) ) );
    HANDLE_ERROR( cudaMalloc( (void**)&dev_matrix2, row_matrix2 * col_matrix2 *sizeof(float) ) );
    HANDLE_ERROR( cudaMalloc( (void**)&dev_result, row_matrix1 * col_matrix2 *sizeof(float) ) );

    cudaMemcpy(dev_matrix1,matrix1,row_matrix1*col_matrix1*sizeof(float),cudaMemcpyHostToDevice);
    cudaMemcpy(dev_matrix2,matrix2,row_matrix2*col_matrix2*sizeof(float),cudaMemcpyHostToDevice);

    cublasSgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, col_matrix2, row_matrix1,col_matrix1 , &alpha, dev_matrix2, col_matrix2, dev_matrix1, col_matrix1, &beta, dev_result, col_matrix2);

    cudaMemcpy(result, dev_result, row_matrix1 * col_matrix2 *sizeof(float), cudaMemcpyDeviceToHost);
    return result;

    cudaFree(dev_matrix1);
    cudaFree(dev_matrix2);
    cudaFree(dev_result);
    free(result);
}

float* add_bias(cublasHandle_t &handle,float* matrix1,float*matrix2,int row_matrix1,int col_matrix1,int row_matrix2, int col_matrix2){
    float alpha=1.0;
    float beta=1.0;   
    float*result= (float*)malloc(row_matrix1*col_matrix2*sizeof(float));
    float* dev_matrix1,*dev_matrix2,*dev_result;

    HANDLE_ERROR( cudaMalloc( (void**)&dev_matrix1, row_matrix1 *col_matrix1  * sizeof(float) ) );
    HANDLE_ERROR( cudaMalloc( (void**)&dev_matrix2, row_matrix2 * col_matrix2 *sizeof(float) ) );
    HANDLE_ERROR( cudaMalloc( (void**)&dev_result, row_matrix1 * col_matrix2 *sizeof(float) ) );

    cudaMemcpy(dev_matrix1,matrix1,row_matrix1*col_matrix1*sizeof(float),cudaMemcpyHostToDevice);
    cudaMemcpy(dev_matrix2,matrix2,row_matrix2*col_matrix2*sizeof(float),cudaMemcpyHostToDevice);

    cublasSgeam(handle, CUBLAS_OP_N, CUBLAS_OP_N, row_matrix1, col_matrix1, &alpha, dev_matrix1, row_matrix1, &beta, dev_matrix2, row_matrix1, dev_result, row_matrix1);

    cudaMemcpy(result, dev_result, row_matrix1 * col_matrix2 *sizeof(float), cudaMemcpyDeviceToHost);
    return result;

    cudaFree(dev_matrix1);
    cudaFree(dev_matrix2);
    cudaFree(dev_result);
    free (result);
}


float* transpose(cublasHandle_t &handle,float* matrix1,int row_matrix1,int col_matrix1){
    float alpha=1.0;
    float beta=0.0;   
    float*result= (float*)malloc(col_matrix1*row_matrix1*sizeof(float));
    float* dev_matrix1,*dev_result;

    HANDLE_ERROR( cudaMalloc( (void**)&dev_matrix1, row_matrix1 *col_matrix1  * sizeof(float) ) );
    HANDLE_ERROR( cudaMalloc( (void**)&dev_result, col_matrix1 *row_matrix1  * sizeof(float) ) );
    

    cudaMemcpy(dev_matrix1,matrix1,row_matrix1*col_matrix1*sizeof(float),cudaMemcpyHostToDevice);

    cublasSgeam(handle, CUBLAS_OP_T, CUBLAS_OP_N, row_matrix1, col_matrix1, &alpha, dev_matrix1, col_matrix1, &beta, dev_matrix1, row_matrix1, dev_result, row_matrix1);
    cudaMemcpy(result, dev_result, col_matrix1 * row_matrix1 *sizeof(float), cudaMemcpyDeviceToHost);
    return result;

    cudaFree(dev_matrix1);
    cudaFree(dev_result);
    free (result);
  }

float * inverse(cublasHandle_t &Handle,float* m,int row_matrix1,int col_matrix1){
    
    float** array_dev_matrix1,** array_dev_resut;
    float* dev_matrix1,* dev_result;
    int* dev_matrix1_pivots,* dev_matrix1_info;

    cudaMalloc(&array_dev_resut, row_matrix1 * col_matrix1 *sizeof(float*));
    cudaMalloc(&array_dev_matrix1, row_matrix1 * col_matrix1 *sizeof(float*));
    cudaMalloc(&dev_matrix1, row_matrix1 * col_matrix1 *sizeof(float*));
    cudaMemcpy(dev_matrix1, m, row_matrix1 * col_matrix1 * sizeof(float*) , cudaMemcpyHostToDevice);
    
    cudaMalloc(&dev_result, row_matrix1 * col_matrix1 *sizeof(float*));    
    cudaMemcpy(array_dev_matrix1, &dev_matrix1, sizeof(float*), cudaMemcpyHostToDevice);
    cudaMalloc(&dev_matrix1_pivots, row_matrix1 * sizeof(int));
    cudaMalloc(&dev_matrix1_info, sizeof(int));
    
    cudaMemcpy(array_dev_resut, &dev_result, sizeof(float*), cudaMemcpyHostToDevice);
    
    cublasSgetrfBatched(Handle, row_matrix1, array_dev_matrix1, row_matrix1, dev_matrix1_pivots, dev_matrix1_info, 1);
    cudaDeviceSynchronize();

    cublasSgetriBatched(Handle, row_matrix1, (const float **)array_dev_matrix1, row_matrix1, dev_matrix1_pivots, array_dev_resut, row_matrix1, dev_matrix1_info, 1);
    cudaDeviceSynchronize();

    float* result = (float*)malloc(row_matrix1 * col_matrix1 *sizeof(float*));
    cudaMemcpy(result, dev_result, row_matrix1 * col_matrix1 *sizeof(float*), cudaMemcpyDeviceToHost);
    
    cudaFree(dev_matrix1_pivots);
    cudaFree(dev_matrix1_info);
    cudaFree(dev_matrix1);
    cudaFree(dev_result);
    cudaFree(array_dev_matrix1);
    cudaFree(array_dev_resut);
    
    return result;
    free(result);
}

float sigmoiddd(float x){
     //float y = x / (1 + abs(x));
     float y=max(0.0,x);
     return y;
    
}

int main(){
  
  int hidden_neurons=2500;
  int weight_x=512;
  int weight_y=hidden_neurons;
  int bias_x=50000;
  int bias_y=hidden_neurons;
  float rand_array[bias_y]; 
  
  float* mat=(float *)malloc(50000*512 *sizeof(float*));
  float* test_features_mat=(float *)malloc(10000*512 *sizeof(float*));

  mat= csv_to_matrix(50000,512);  

  float *weights = (float *)malloc(weight_x *weight_y  * sizeof(float));
  float *bias = (float *)malloc(bias_x * bias_y * sizeof(float));
  float * H=(float *)malloc(bias_x * bias_y * sizeof(float));
  float * H_transpose=(float *)malloc(hidden_neurons * 50000 * sizeof(float));
  float * H_transpose_H=(float *)malloc(hidden_neurons * hidden_neurons * sizeof(float));
  float *H_inverse = (float *)malloc(hidden_neurons * hidden_neurons * sizeof(float));
  float *pseudo_inverse=(float*)malloc(hidden_neurons * 50000 * sizeof(float));
  float *Beta=(float*)malloc(hidden_neurons * 100 * sizeof(float));
  float* train_label=(float*)malloc(50000*100 *sizeof(float*));
  float* output=(float*)malloc(50000*100 *sizeof(float*));


   for(int i=0;i<weight_x;i++){
        for(int j=0;j<weight_y;j++){
          *(weights + i*weight_y + j)=random_values(-1.0,1.0);  
          //printf("%f \t",*(weights + i*weight_y + j));
         }
      }
   
   for (int i=0;i<bias_y;i++){
       rand_array[i]=random_values(-1.0,1.0);
   }
   
   for(int i=0;i<bias_x;i++){
        for(int j=0;j<bias_y;j++){
          *(bias + i*bias_y + j)=rand_array[j];  
         }
      }

  cublasHandle_t handle;
  cublasCreate(&handle);

  float *weights_features=productcal(handle,mat,weights,50000,512,weight_x,weight_y);
  float *bias_add=add_bias(handle,weights_features,bias,50000,hidden_neurons,bias_x,bias_y);
  
  for(int i=0;i<bias_x;i++){
        for(int j=0;j<bias_y;j++){
          *(H + i*bias_y + j)=sigmoiddd(*(bias_add+i*bias_y  + j));  
         }
         }

   free(bias_add);
   free(weights_features);      
   
   // Process of calculating Pseudo Inverse Of H.
   
   //First step : Computing Transpose of H and its matrix_multiplication with its normal form. 
  
   H_transpose= transpose(handle,H,50000,hidden_neurons);
   H_transpose_H= productcal(handle,H_transpose,H,hidden_neurons,50000,50000,hidden_neurons);
   H_inverse=inverse(handle,H_transpose_H,hidden_neurons,hidden_neurons);
   free(H_transpose_H);
   pseudo_inverse= productcal(handle,H_inverse,H_transpose,hidden_neurons,hidden_neurons,hidden_neurons,50000);
   
  free(H_transpose);  
  
  FILE *open_file2;

	open_file2 = fopen("Train_labels.txt", "r");

	for(int i = 0; i < 50000; i++){
		for (int j = 0; j < 100; j++)
		if (fscanf(open_file2, "%f,", &train_label[i * 100 + j]) ==EOF) break;
    //checking if the file is over or not
		if (feof(open_file2)) break;
	} 
	fclose(open_file2);

  Beta=productcal(handle,pseudo_inverse,train_label,hidden_neurons,50000,50000,100);
  free(pseudo_inverse);
  output= productcal(handle,H,Beta,50000,hidden_neurons,hidden_neurons,100);

FILE *open_file3;

open_file3 = fopen("output_train.csv", "w+"); 

 
for(int i=0;i<50000;i++)
{
 
    for(int j=0;j<100;j++)
 {      
        fprintf(open_file3,",%f ",* (output+ i*100+ j));

        }
            fprintf(open_file3,"\n");
}
fclose(open_file3);

free(output);
FILE *open_file4;

	open_file4 = fopen("Testing_features.txt", "r");

	for(int i = 0; i < 10000; i++){
		for (int j = 0; j < 512; j++)
		if (fscanf(open_file4, "%f,", &test_features_mat[i * 512 + j]) ==EOF) break;
    //checking if the file is over or not
		if (feof(open_file4)) break;
	} 
   
	fclose(open_file4);
  free(H);
   weights_features=productcal(handle,test_features_mat,weights,10000,512,weight_x,weight_y);
   bias_add=add_bias(handle,weights_features,bias,10000,hidden_neurons,10000,bias_y);
   float *H_test= (float *)malloc(10000 * bias_y * sizeof(float));
   for(int i=0;i<10000;i++){
    for(int j=0;j<bias_y;j++){
      *(H_test + i*bias_y + j)=sigmoiddd(*(bias_add+i*bias_y  + j));  
         }

         }
  float*test_output=productcal(handle,H_test,Beta,10000,hidden_neurons,hidden_neurons,100);

  FILE *open_file5;

  open_file5 = fopen("output_test.csv", "w+"); 

 
  for(int i=0;i<10000;i++)
  {
 
    for(int j=0;j<100;j++)
  {      
        fprintf(open_file5,",%f ",* (test_output+ i*100+ j));

        }
            fprintf(open_file5,"\n");
  }
  fclose(open_file5);

  free(test_output);
 
}    


'File written in /content/src/matrix_multiplication.cu'

In [9]:
!nvcc -o /content/src/matrix_multiplication /content/src/matrix_multiplication.cu -lcublas -lcurand

In [10]:
!/content/src/matrix_multiplication

In [11]:

#np.loadtxt(open("output_train.csv", "rb"), delimiter=",",skiprows=0)  
predicted=pd.read_csv("output_train.csv")

#predicted_labels=predicted[:,1:]
#predicted.set_axis(['C1','c2','c3','c4','c5','c6','c7','c8','c9','c10','c11'], axis='columns', inplace=False)
predicted.drop(predicted.columns[0], axis=1, inplace=True)
row1=predicted.columns
predicted.loc[-1] = row1  # adding a row
predicted.index = predicted.index + 1  # shifting index
predicted.sort_index(inplace=True)

predicted=predicted.to_numpy()


In [12]:
def accuracy(Error,target):
    count=0
 
    for i in range(len(Error)):
      index=np.argmax(target[i])    # getting the index of the target label
      index_predicted=np.argmax(Error[i]) #getting the index of the predicted label 
      if index_predicted==index:
        count=count+1
    predicted_accuracy=(count/len(Error))*100   #calculating the accuracy based on the total correct predictions vs total values
    #print(predicted_accuracy)
    return predicted_accuracy

In [13]:
predicted_accuracy=accuracy(predicted,train_label) #Calculating the accuracy of the predicted labels
print(predicted_accuracy)

62.57


In [14]:



predicted_test=pd.read_csv("output_test.csv")

#predicted_labels=predicted[:,1:]
#predicted.set_axis(['C1','c2','c3','c4','c5','c6','c7','c8','c9','c10','c11'], axis='columns', inplace=False)
predicted_test.drop(predicted_test.columns[0], axis=1, inplace=True)
row1_test=predicted_test.columns
predicted_test.loc[-1] = row1_test  # adding a row
predicted_test.index = predicted_test.index + 1  # shifting index
predicted_test.sort_index(inplace=True)
predicted_test=predicted_test.to_numpy()


In [15]:
predicted_test_accuracy=accuracy(predicted_test,test_label) #Calculating the accuracy of the predicted labels
print(predicted_test_accuracy)

41.17
