In [95]:
import random
import numpy as np
import pandas as pd

In [96]:
#Initializing the setup
def initialize(file):
  
  #load dataset
  dataset = pd.read_csv(file, sep=",", header=None)
  
  #raw data of 3D vectors
  #vectors represent the three green house gases [methane, CO2, NO2]
  raw_data = np.zeros((dataset.shape[0],3))
  
  for i in range(1, dataset.shape[0]):
      raw_data[i][0] = dataset[2][i]
      raw_data[i][1] = dataset[3][i]
      raw_data[i][2] = dataset[4][i]

  raw_data = np.delete(raw_data, 0, 0)
  return raw_data

In [97]:
#Normalization of raw data
def preprocess(data):

  normalized_data = data  #copying so that original data is not lost (maybe unnecessary, did it rn cuz not sure)

  sums = normalized_data.sum(axis=1)
  normalized_data = normalized_data / sums[:, np.newaxis]
  
  return normalized_data

In [98]:
dimensions = np.array([20,20])
iterations = 1000
learning_rate = 0.01

raw_data = initialize('greenhouse_gases_data.csv')

m = raw_data.shape[0]
n = raw_data.shape[1]

weight_matrix =  np.random.random((dimensions[0], dimensions[1], m)) #randomized weight vector for SOM
radius = max(dimensions[0], dimensions[1])/2  #neighborhood radius
decay = iterations/np.log(radius)
n_i = 0    #nearest input

normalized = preprocess(raw_data)
#print(normalized)

In [99]:
#find the nearest input for v
def nearest_input(v, matrix, m):
    min_dist = 1000000000
    
    x = matrix.shape[0]
    y = matrix.shape[1]
    
    for i in range(x):
        for j in range(y):
            w = matrix[i, j, :].reshape(m, 1)
            dist = np.sum((w-v)**2)
            if dist < min_dist:
                #min_dist = dist
                #print('check')
                n_i = np.array([i, j])

    return n_i
#print('nearest input val:', nearest_input(v, weight_matrix, m))

In [100]:
def influence(init_radius, dist):
    
    inf = np.exp(-dist/ 2*(radius**2))
    
    return inf

In [101]:
def decaying(init_radius, init_lr, step, i, d):
    l_r = init_lr *np.exp(-step/ i)
    r = init_radius*np.exp(-step/ d)
    return l_r, r

In [102]:
#learning
for step in range(iterations):
    #Random input vector selection for training
    v = normalized[:, np.random.randint(0, n)].reshape(np.array([m, 1]))
    #print("v:", v)
    
    n_i = nearest_input(v, weight_matrix, m)    #nearest input
    n_i_vector = weight_matrix[n_i[0], n_i[1], :].reshape(m,1)    #nearest input vector
    #print("n,i:", n_i)
    
    
    learning_rate, radius = decaying(radius, learning_rate, step, iterations, decay)
    print("radius: ", radius)
    print("lr: ", learning_rate)


radius:  10.0
lr:  0.01
radius:  9.977000638225533
lr:  0.009990004998333751
radius:  9.931160484209338
lr:  0.009970044955033732
radius:  9.862794856312105
lr:  0.009940179640539355
radius:  9.772372209558107
lr:  0.009900498337491683
radius:  9.660508789898135
lr:  0.009851119396030629
radius:  9.52796164023652
lr:  0.009792189645694598
radius:  9.375620069258803
lr:  0.00972388366801247
radius:  9.204495717531714
lr:  0.009646402934831233
radius:  9.01571137605957
lr:  0.009559974818331003
radius:  8.810488730080143
lr:  0.009464851479534842
radius:  8.59013521505396
lr:  0.009361308642916193
radius:  8.356030182312484
lr:  0.009249644265435397
radius:  8.109610578538412
lr:  0.009130177108992662
radius:  7.852356346100722
lr:  0.00900324522586266
radius:  7.585775750291841
lr:  0.008869204367171578
radius:  7.311390834834177
lr:  0.008728426324887196
radius:  7.030723198838338
lr:  0.008581297218113942
radius:  6.745280276979223
lr:  0.008428215734716204
radius:  6.456542290346558
