In [7]:
import random
import numpy as np
import pandas as pd

In [8]:
#Initializing the setup
def initialize(file):
  
  #load dataset
  dataset = pd.read_csv(file, sep=",", header=None)
  
  #raw data of 3D vectors
  #vectors represent the three green house gases [methane, CO2, NO2]
  raw_data = np.zeros((dataset.shape[0],3))
  
  for i in range(1, dataset.shape[0]):
      raw_data[i][0] = dataset[2][i]
      raw_data[i][1] = dataset[3][i]
      raw_data[i][2] = dataset[4][i]

  raw_data = np.delete(raw_data, 0, 0)
  return raw_data

In [43]:
#Normalization of raw data
def preprocess(data):

  normalized_data = data  #copying so that original data is not lost (maybe unnecessary, did it rn cuz not sure)

  sums = normalized_data.sum(axis=1)
  normalized_data = normalized_data / sums[:, np.newaxis]
  
  return normalized_data

In [79]:
dimensions = np.array([20,20])
iterations = 1000
learning_rate = 0.01

raw_data = initialize('greenhouse_gases_data.csv')

m = raw_data.shape[0]
n = raw_data.shape[1]

weight_matrix =  np.random.random((dimensions[0], dimensions[1], m)) #randomized weight vector for SOM
radius = max(dimensions[0], dimensions[1])/2  #neighborhood radius
decay = iterations/np.log(radius)
n_i = 0    #nearest input

normalized = preprocess(raw_data)
#print(normalized)

In [80]:
#find the nearest input for v
def nearest_input(v, matrix, m):
    min_dist = np.iinfo(np.int).max
    for i in range(matrix.shape[0]):
        for j in range(matrix.shape[1]):
            w = matrix[i, j, :].reshape(m, 1)
            dist = np.sum((w-v)**2)
            #print(dist)
            if dist < min_dist:
                min_dist = dist
                #print('check')
                n_i = np.array([i, j])
    return n_i
#print('nearest input val:', nearest_input(v, weight_matrix, m))

In [81]:
def influence(init_radius, dist):
    inf = np.exp(-dist/ 2*(radius**2))
    return inf

In [82]:
def decaying(init_radius, init_lr, step, i, d):
    l_r = init_lr *np.exp(-step/ i)
    r = init_radius*np.exp(-step/ d)
    return l_r, r

In [85]:
#learning
for step in range(20):
    #Random input vector selection for training
    v = normalized[:, np.random.randint(0, n)].reshape(np.array([m, 1]))
    #print("v:", v)
    
    n_i = nearest_input(v, weight_matrix, m)    #nearest input
    n_i_vector = weight_matrix[n_i[0], n_i[1], :].reshape(m,1)    #nearest input vector
    #print("n,i:", n_i)
    
    
    new_radius, new_learning_rate = decaying(radius, learning_rate, step, iterations, decay)
    radius = new_radius
    learning_rate = new_learning_rate
    #print("radius: ", radius)
    #print("lr: ", learning_rate)
