In [1]:
from utility import *

In [2]:
import numpy as np

In [3]:
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

In [4]:
def r2(y_true,y_pred):
    return r2_score(y_true,y_pred)

def mse(y_true,y_pred):
    return mean_squared_error(y_true,y_pred)

def mae(y_true,y_pred):
    return mean_absolute_error(y_true,y_pred)

In [5]:
from sklearn.utils.validation import check_array 

def mean_absolute_percentage_error(y_true, y_pred):
	'''
	scikit(sklearn) does not have support for mean absolute percentage error MAPE.
	This is because the denominator can theoretically be 0 and so the value would be undefined.
	So this is our implementation
	'''
# 	y_true = check_array(y_true)
# 	y_pred = check_array(y_pred)

	return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

In [6]:
def mape(y_true, y_pred):
    return mean_absolute_percentage_error(y_true,y_pred)

### Reading the data 

In [143]:
data_slice = pd.read_csv('dataSlice.csv')

In [144]:
data_slice.head()

Unnamed: 0.1,Unnamed: 0,0,1,2,3,4,5,6,7,8,...,330,331,332,333,334,335,336,337,338,339
0,0,0.5,1.5,0.5,0.0,300.0,1876.72,1069.29,895.958,786.233,...,752.526,749.449,746.517,743.726,741.068,738.531,736.101,733.762,731.501,729.177
1,1,0.5,1.0,0.5,0.0,300.0,1959.09,1068.08,896.858,787.094,...,738.167,735.334,732.608,729.997,727.499,725.109,722.819,720.617,718.494,716.318
2,2,0.0,0.5,0.5,0.0,300.0,1643.9,1019.1,864.489,762.822,...,721.491,718.93,716.449,714.058,711.762,709.56,707.447,705.416,703.46,701.459
3,3,0.5,0.5,0.5,0.0,300.0,1845.84,1067.81,897.931,788.166,...,725.587,722.97,720.436,717.993,715.647,713.397,711.238,709.163,707.163,705.117
4,4,0.0,0.0,0.5,0.0,300.0,1554.23,1015.72,863.362,762.239,...,716.77,714.288,711.877,709.548,707.308,705.157,703.092,701.108,699.196,697.242


In [3]:
toolpath, state, endTime = parse_toolpath('data/','toolpath.crs',0.1)

In [63]:
def createDatabase(dataframe):
    start = time.time()
    neighborColumns,dictList = [],[]
    
    for neighbor in range(26):
        neighborColumns += ['T'+str(neighbor+1)+'(t/t-1)']
    
    columns = ['timestep','x(voxel)','y(voxel)','z(voxel)','timeOfCreation','x(laser)','y(laser)','z(laser)','distance(laser)',
               'T0(t-1)'] + neighborColumns +['T_self']

    indices = loadNumpy('indices_slice')
    
    num_timesteps = len(dataframe.loc[0]-4)
    num_voxels = len(dataframe)
    
    for timestep in range(1,5):#range(1,num_timesteps+1):
        indices = loadDict('indices')
        for voxel in indices.keys():
            x,y,z = voxel
            x_laser,y_laser,z_laser = tuple(toolpath[timestep-1])

            voxelPosObject = Coordinate(x,y,z)
            laserPosObject = Coordinate(x_laser,y_laser,z_laser)
            distance_laser = voxelPosObject.distance(laserPosObject)
            
            dfIndex = indices[voxel]
            row = dataframe.loc[dfIndex].tolist()
            time_creation = row[3]
            
            T_voxel_previous = getTemperature(voxelPosObject,timestep-1)
            
            dictionary = {'timestep':timestep, 'x(voxel)':x, 'y(voxel)':y, 'z(voxel)':z, 
                         'timeOfCreation':time_creation, 'x(laser)':x_laser,'y(laser)':y_laser,'z(laser)':z_laser,
                          'distance(laser)':distance_laser, 'T0(t-1)':T_voxel_previous}
            
            neighborTemps = findAdjustedNeighborTemperatures(voxelPosObject,timestep)
            for i in range(len(neighborTemps)):
                key = 'T'+str(i+1)+'(t/t-1)'
                dictionary[key] = neighborTemps[i]
            dictList += [dictionary]
                
    database =  pd.DataFrame(dictList,columns=columns)
    stop = time.time()
    print 'Database creation took',stop-start,'seconds'
    return database

### <font color="magenta"> Testing the utility functions</font>

In [2]:
findAdjustedNeighborTemperatures(Coordinate(0.5,1,0.5),200)

[626.254,
 639.962,
 618.243,
 641.141,
 300,
 634.361,
 614.561,
 640.055,
 627.957,
 654.206,
 300,
 300,
 624.5840000000002,
 647.7819999999998,
 300,
 300,
 630.9680000000002,
 642.3430000000002,
 300,
 300,
 300,
 300,
 621.264,
 644.276,
 632.389,
 656.03]

In [5]:
printNeighbors(Coordinate(0.0,1.0,0.0))

The neighbors of point Coordinate(0.0,1.0,0.0)  are:

Immediate Neighbors: 

Coordinate(-0.5,1.0,0.0)
Coordinate(0.5,1.0,0.0)
Coordinate(0.0,0.5,0.0)
Coordinate(0.0,1.5,0.0)
Coordinate(0.0,1.0,-0.5)
Coordinate(0.0,1.0,0.5)

2-D diagonal Neighbors: 

Coordinate(-0.5,0.5,0.0)
Coordinate(-0.5,1.5,0.0)
Coordinate(0.5,0.5,0.0)
Coordinate(0.5,1.5,0.0)
Coordinate(0.0,0.5,-0.5)
Coordinate(0.0,1.5,-0.5)
Coordinate(0.0,0.5,0.5)
Coordinate(0.0,1.5,0.5)
Coordinate(-0.5,1.0,-0.5)
Coordinate(0.5,1.0,-0.5)
Coordinate(-0.5,1.0,0.5)
Coordinate(0.5,1.0,0.5)

3-D diagonal Neighbors: 

Coordinate(-0.5,0.5,-0.5)
Coordinate(-0.5,1.5,-0.5)
Coordinate(0.5,0.5,-0.5)
Coordinate(0.5,1.5,-0.5)
Coordinate(-0.5,0.5,0.5)
Coordinate(-0.5,1.5,0.5)
Coordinate(0.5,0.5,0.5)
Coordinate(0.5,1.5,0.5)


In [42]:
database.head()

Unnamed: 0,timestep,x(voxel),y(voxel),z(voxel),timeOfCreation,x(laser),y(laser),z(laser),distance(laser),T0(t-1),...,T17(t/t-1),T18(t/t-1),T19(t/t-1),T20(t/t-1),T21(t/t-1),T22(t/t-1),T23(t/t-1),T24(t/t-1),T25(t/t-1),T26(t/t-1)
0,331,5.5,4.5,0.5,0.5,14.068543,14.25,1.5,13.018541,976.909,...,971.259,989.789,300.0,300.0,300.0,300.0,946.171,998.136,964.228,1017.03
1,331,11.0,19.5,1.0,1.0,14.068543,14.25,1.5,6.101512,679.058,...,300.0,300.0,700.751,676.327,697.599,673.645,300.0,300.0,300.0,300.0
2,332,5.5,4.5,0.5,0.5,11.468543,14.25,1.5,11.475452,971.404,...,965.789,983.968,300.0,300.0,300.0,300.0,940.988,992.324,958.733,1010.85
3,332,11.0,19.5,1.0,1.0,11.468543,14.25,1.5,5.294529,684.909,...,300.0,300.0,707.313,681.878,704.324,679.097,300.0,300.0,300.0,300.0


In [43]:
indices = loadNumpy('indices_slice')

In [64]:
db = createDatabase(df)
db.head()

Database creation took 480.207699776 seconds


Unnamed: 0,timestep,x(voxel),y(voxel),z(voxel),timeOfCreation,x(laser),y(laser),z(laser),distance(laser),T0(t-1),...,T17(t/t-1),T18(t/t-1),T19(t/t-1),T20(t/t-1),T21(t/t-1),T22(t/t-1),T23(t/t-1),T24(t/t-1),T25(t/t-1),T26(t/t-1)
0,1,14.5,1.0,0.5,0.5,0.0,0.75,0.5,14.502155,300.0,...,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0
1,1,1.5,4.0,1.5,1.5,0.0,0.75,0.5,3.716517,300.0,...,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0
2,1,2.0,2.5,1.5,1.5,0.0,0.75,0.5,2.839454,300.0,...,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0
3,1,6.5,4.0,1.5,1.5,0.0,0.75,0.5,7.3357,300.0,...,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0
4,1,13.0,13.5,1.0,1.0,0.0,0.75,0.5,18.215721,300.0,...,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0


In [60]:
len(db)

4803

In [62]:
db.tail()

Unnamed: 0,timestep,x(voxel),y(voxel),z(voxel),timeOfCreation,x(laser),y(laser),z(laser),distance(laser),T0(t-1),...,T17(t/t-1),T18(t/t-1),T19(t/t-1),T20(t/t-1),T21(t/t-1),T22(t/t-1),T23(t/t-1),T24(t/t-1),T25(t/t-1),T26(t/t-1)
4798,1,0.5,4.5,0.5,0.5,0.0,0.75,0.5,3.783186,300,...,300,300,300.0,300.0,300.0,300.0,300,300,300,300
4799,1,15.5,6.0,1.0,1.0,0.0,0.75,0.5,16.372614,300,...,300,300,300.0,300.0,300.0,300.0,300,300,300,300
4800,1,5.5,4.5,0.5,0.5,0.0,0.75,0.5,6.656763,300,...,300,300,300.0,300.0,300.0,300.0,300,300,300,300
4801,1,11.0,19.5,1.0,1.0,0.0,0.75,0.5,21.744252,300,...,300,300,300.0,300.0,300.0,300.0,300,300,300,300
4802,1,0.5,0.0,1.0,0.5,0.0,0.75,0.5,1.030776,300,...,300,300,300.0,300.0,300.0,300.0,300,300,300,300


In [298]:
import os
def dataFrames(num,columns=columns):
    List = []
    nums_start,nums_stop = [],[]
    for item in os.listdir('data'):
        if "data_" in item and ".npy" in item:
            timeStep_start = int(item.split('data_')[1].split('_')[0])
            nums_start += [timeStep_start]
            
            timeStep_stop = int(item.split('_')[2].split('.npy')[0])
            nums_stop += [timeStep_stop]
            
    nums_start = sorted(nums_start)
    nums_stop = sorted(nums_stop)
    
    for i in range(len(nums_start)):
        item = 'data_'+str(nums_start[i])+'_'+str(nums_stop[i])+'.npy'
        List += [item]
    
    
    return pd.DataFrame(loadNumpy('data/'+List[num]),columns=columns)
    
def combineDataFrames(columns=columns):
    List = []
    nums_start,nums_stop = [],[]
    for item in os.listdir('data'):
        if "data_" in item and ".npy" in item:
            timeStep_start = int(item.split('data_')[1].split('_')[0])
            nums_start += [timeStep_start]
            
            timeStep_stop = int(item.split('_')[2].split('.npy')[0])
            nums_stop += [timeStep_stop]
            
    nums_start = sorted(nums_start)
    nums_stop = sorted(nums_stop)
    
    array = loadNumpy('data/data_'+str(nums_start[0])+'_'+str(nums_stop[0])+'.npy')
    for i in range(1,len(nums_start)):
        newFile = 'data/data_'+str(nums_start[i])+'_'+str(nums_stop[i])+'.npy'
        array = np.append(array,loadNumpy(newFile),axis=0)
    return pd.DataFrame(array,columns=columns)
   

In [202]:
df = combineDataFrames()

In [203]:
df.head()

Unnamed: 0,timestep,x_voxel,y_voxel,z_voxel,timeOfCreation,x_laser,y_laser,z_laser,x_distance,y_distance,...,T18_t_t-1,T19_t_t-1,T20_t_t-1,T21_t_t-1,T22_t_t-1,T23_t_t-1,T24_t_t-1,T25_t_t-1,T26_t_t-1,T_self
0,1.0,14.5,1.0,0.5,0.5,0.0,0.75,0.5,14.5,0.25,...,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0
1,1.0,1.5,4.0,1.5,1.5,0.0,0.75,0.5,1.5,3.25,...,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0
2,1.0,2.0,2.5,1.5,1.5,0.0,0.75,0.5,2.0,1.75,...,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0
3,1.0,6.5,4.0,1.5,1.5,0.0,0.75,0.5,6.5,3.25,...,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0
4,1.0,13.0,13.5,1.0,1.0,0.0,0.75,0.5,13.0,12.75,...,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0


In [None]:
neighborColumns = []
for neighbor in range(26):
    neighborColumns += ['T'+str(neighbor+1)+'_t_t-1']
columns = ['timestep','x_voxel','y_voxel','z_voxel','timeOfCreation','x_laser','y_laser','z_laser','x_distance','y_distance','z_distance','euclidean_distance_laser','T0_t-1'] + neighborColumns+['T_self']

#### This is till the first 334 steps 

In [210]:
df_mod = df[['timestep','x_distance','y_distance','z_distance','T0_t-1']+neighborColumns+['T_self']]

In [283]:
df.to_csv('additive_complete.csv',index=False)

In [280]:
df_mod.to_csv('additive_modified.csv',index=False)

#### analye additive_complete

In [8]:
df = pd.read_csv('data/additive_complete.csv')

In [9]:
len(df)

1604202

In [10]:
df.tail()

Unnamed: 0,timestep,x_voxel,y_voxel,z_voxel,timeOfCreation,x_laser,y_laser,z_laser,x_distance,y_distance,...,T18_t_t-1,T19_t_t-1,T20_t_t-1,T21_t_t-1,T22_t_t-1,T23_t_t-1,T24_t_t-1,T25_t_t-1,T26_t_t-1,T_self
1604197,334.0,0.5,4.5,0.5,0.5,6.268543,14.25,1.5,5.768543,9.75,...,857.352,300.0,300.0,300.0,300.0,824.853,865.511,837.031,879.02,832.61
1604198,334.0,15.5,6.0,1.0,1.0,6.268543,14.25,1.5,9.231457,8.25,...,1003.2,990.635,1041.39,967.123,1015.84,1002.59,1055.08,978.835,1029.03,1008.11
1604199,334.0,5.5,4.5,0.5,0.5,6.268543,14.25,1.5,0.768543,9.75,...,972.698,300.0,300.0,300.0,300.0,930.996,980.765,948.186,998.721,955.416
1604200,334.0,11.0,19.5,1.0,1.0,6.268543,14.25,1.5,4.731457,5.25,...,300.0,725.64,694.689,725.494,693.17,300.0,300.0,300.0,300.0,706.502
1604201,334.0,0.5,0.0,1.0,0.5,6.268543,14.25,1.5,5.768543,14.25,...,733.828,300.0,705.416,300.0,718.936,300.0,728.334,300.0,736.967,720.916


In [214]:
featureColumns = ['timestep','x_distance','y_distance','z_distance','T0_t-1']+neighborColumns

In [211]:
df_mod.head()

Unnamed: 0,timestep,x_distance,y_distance,z_distance,T0_t-1,T1_t_t-1,T2_t_t-1,T3_t_t-1,T4_t_t-1,T5_t_t-1,...,T18_t_t-1,T19_t_t-1,T20_t_t-1,T21_t_t-1,T22_t_t-1,T23_t_t-1,T24_t_t-1,T25_t_t-1,T26_t_t-1,T_self
0,1.0,14.5,0.25,0.0,300.0,300.0,300.0,300.0,300.0,300.0,...,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0
1,1.0,1.5,3.25,1.0,300.0,300.0,300.0,300.0,300.0,300.0,...,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0
2,1.0,2.0,1.75,1.0,300.0,300.0,300.0,300.0,300.0,300.0,...,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0
3,1.0,6.5,3.25,1.0,300.0,300.0,300.0,300.0,300.0,300.0,...,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0
4,1.0,13.0,12.75,0.5,300.0,300.0,300.0,300.0,300.0,300.0,...,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0


In [None]:
df.T1_t_t

In [227]:
df_mod.tail()

Unnamed: 0,timestep,x_distance,y_distance,z_distance,T0_t-1,T1_t_t-1,T2_t_t-1,T3_t_t-1,T4_t_t-1,T5_t_t-1,...,T18_t_t-1,T19_t_t-1,T20_t_t-1,T21_t_t-1,T22_t_t-1,T23_t_t-1,T24_t_t-1,T25_t_t-1,T26_t_t-1,T_self
1604197,334.0,5.768543,9.75,1.0,836.181,831.077,849.403,813.821,856.458,300.0,...,857.352,300.0,300.0,300.0,300.0,824.853,865.511,837.031,879.02,832.61
1604198,334.0,9.231457,8.25,0.5,1015.31,1027.26,995.345,983.442,1041.56,1003.54,...,1003.2,990.635,1041.39,967.123,1015.84,1002.59,1055.08,978.835,1029.03,1008.11
1604199,334.0,0.768543,9.75,1.0,960.701,946.256,969.015,931.773,986.195,300.0,...,972.698,300.0,300.0,300.0,300.0,930.996,980.765,948.186,998.721,955.416
1604200,334.0,4.731457,5.25,0.5,697.6,706.869,696.262,727.966,690.818,702.247,...,300.0,725.64,694.689,725.494,693.17,300.0,300.0,300.0,300.0,706.502
1604201,334.0,5.768543,14.25,0.5,723.184,720.296,729.854,300.0,726.645,704.856,...,733.828,300.0,705.416,300.0,718.936,300.0,728.334,300.0,736.967,720.916


In [230]:
len(df),1604202/3

(1604202, 534734)

In [231]:
testSplit = 534734

In [274]:
X = df_mod.loc[:,featureColumns ]

In [216]:
X.shape

(1604202, 31)

In [217]:
y = df_mod['T_self']

In [218]:
from sklearn.linear_model import LinearRegression,Ridge

In [219]:
from sklearn.model_selection import train_test_split

In [251]:
from sklearn.utils import shuffle

### Random Split 

In [241]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

In [242]:
linear = LinearRegression()
linear.fit(X_train,y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

In [243]:
predicted = linear.predict(X_test)

In [244]:
r2_score(y_test,predicted)

0.9388742734835486

### Sequential Split with Shuffling

In [275]:
X_train,y_train = shuffle(X[:-testSplit], y[:-testSplit])
X_test,y_test = shuffle(X[-testSplit:], y[-testSplit:])

In [276]:
linear = LinearRegression()
linear.fit(X_train,y_train)
predicted = linear.predict(X_test)

In [277]:
r2_score(y_test,predicted) ,mape(y_test,predicted)

(0.9312097600112107, 3.3146702657037452)

<i><font color="magenta"> This means that given the information of prior timesteps, we can predict what is the temperature profile of future timesteps </font></i>

In [248]:
len(X_train), len(X_test)

(1069468, 534734)

In [255]:
X_train.tail()

Unnamed: 0,timestep,x_distance,y_distance,z_distance,T0_t-1,T1_t_t-1,T2_t_t-1,T3_t_t-1,T4_t_t-1,T5_t_t-1,...,T17_t_t-1,T18_t_t-1,T19_t_t-1,T20_t_t-1,T21_t_t-1,T22_t_t-1,T23_t_t-1,T24_t_t-1,T25_t_t-1,T26_t_t-1
800999,167.0,10.315729,1.25,0.5,300.0,300.0,300.0,300.0,300.0,1669.95,...,300.0,300.0,1545.44,1676.44,1568.49,1681.56,300.0,300.0,300.0,300.0
545247,114.0,4.2,7.75,0.5,300.0,300.0,300.0,300.0,300.0,701.885,...,300.0,300.0,666.364,703.395,698.521,740.202,300.0,300.0,300.0,300.0
876579,183.0,3.415729,6.75,0.5,573.727,576.263,572.442,588.912,562.806,300.0,...,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0
520009,109.0,1.2,1.75,0.5,300.0,300.0,300.0,300.0,300.0,1776.59,...,300.0,300.0,1644.54,1721.03,1706.13,2464.32,300.0,300.0,300.0,300.0
752021,157.0,6.315729,3.75,0.5,678.279,680.137,704.972,770.593,646.187,300.0,...,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0


In [256]:
X_test.head()

Unnamed: 0,timestep,x_distance,y_distance,z_distance,T0_t-1,T1_t_t-1,T2_t_t-1,T3_t_t-1,T4_t_t-1,T5_t_t-1,...,T17_t_t-1,T18_t_t-1,T19_t_t-1,T20_t_t-1,T21_t_t-1,T22_t_t-1,T23_t_t-1,T24_t_t-1,T25_t_t-1,T26_t_t-1
1409941,294.0,14.231458,5.25,0.5,738.198,729.813,754.683,774.706,719.887,745.974,...,300.0,300.0,764.192,717.253,782.006,733.25,300.0,300.0,300.0,300.0
1340181,280.0,10.668543,1.25,1.0,1479.77,1386.96,1489.8,1518.03,1397.94,300.0,...,1570.68,1620.79,300.0,300.0,300.0,300.0,1635.4,1475.3,1655.81,1503.39
1591652,332.0,4.031457,11.25,1.0,887.032,896.406,871.92,862.076,908.532,300.0,...,899.701,880.145,300.0,300.0,300.0,300.0,878.977,921.617,860.076,901.34
1195597,249.0,15.0,0.264375,0.768862,818.723,822.922,809.712,800.486,833.785,300.0,...,827.108,809.853,300.0,300.0,300.0,300.0,812.276,842.526,795.641,824.643
1093580,228.0,13.084271,15.25,0.5,300.0,300.0,300.0,300.0,300.0,697.722,...,300.0,300.0,691.511,719.795,676.261,703.272,300.0,300.0,300.0,300.0


### Removing the neighboring columns

In [287]:
featureColumns_no_neighbor = ['timestep','x_distance','y_distance','z_distance','T0_t-1']

In [288]:
X = df_mod.loc[:,featureColumns_no_neighbor]
y = df_mod['T_self']

In [289]:
X_train,y_train = shuffle(X[:-testSplit], y[:-testSplit])
X_test,y_test = shuffle(X[-testSplit:], y[-testSplit:])

In [260]:
X.head()

Unnamed: 0,timestep,x_distance,y_distance,z_distance,T0_t-1
0,1.0,14.5,0.25,0.0,300.0
1,1.0,1.5,3.25,1.0,300.0
2,1.0,2.0,1.75,1.0,300.0
3,1.0,6.5,3.25,1.0,300.0
4,1.0,13.0,12.75,0.5,300.0


In [261]:
linear = LinearRegression()
linear.fit(X_train,y_train)
predicted = linear.predict(X_test)

In [262]:
r2_score(y_test,predicted)

0.6836431654690913

In [292]:
ridge = Ridge()
ridge.fit(X_train,y_train)
predicted = ridge.predict(X_test)

In [293]:
r2_score(y_test,predicted)

0.6836432821545422

In [266]:
mean_absolute_percentage_error(y_test,predicted)

13.384381877965682

In [290]:
from sklearn.ensemble import RandomForestRegressor,ExtraTreesRegressor

In [291]:
rf = RandomForestRegressor(n_jobs=-1)
rf.fit(X_train,y_train)
predicted = rf.predict(X_test)
r2(y_test,predicted) ,mape(y_test,predicted)

(0.9686697912926572, 1.66312806128448)

<i><font color="magenta"> The prediction accuracy goes down as we have less information available about the neighboring elements </font></i>

## Future Experiments :
* Look at the mae, mse but especially mape
* See what happens when we reduce the layer 
* See what happens when we increase the information by including distance
* Split temperature for higher layers with lower layers
* Look at what degree of columns is enough - does decreasing reduce the r2 and increasing increase the r2 ?
* Other algorithms - especially linear ones does it help

### Bigger Slice

In [285]:
df_new = pd.DataFrame(loadNumpy('data_1_2'),columns=columns)

In [286]:
df_new.head()

Unnamed: 0,timestep,x_voxel,y_voxel,z_voxel,timeOfCreation,x_laser,y_laser,z_laser,x_distance,y_distance,...,T18_t_t-1,T19_t_t-1,T20_t_t-1,T21_t_t-1,T22_t_t-1,T23_t_t-1,T24_t_t-1,T25_t_t-1,T26_t_t-1,T_self
0,1.0,10.0,3.0,3.0,3.0,0.0,0.75,0.5,10.0,2.25,...,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0
1,1.0,9.5,2.0,2.0,2.0,0.0,0.75,0.5,9.5,1.25,...,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0
2,1.0,16.0,16.0,2.0,2.0,0.0,0.75,0.5,16.0,15.25,...,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0
3,1.0,19.5,15.0,2.0,2.0,0.0,0.75,0.5,19.5,14.25,...,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0
4,1.0,11.5,17.5,2.5,2.5,0.0,0.75,0.5,11.5,16.75,...,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0


In [309]:
def combineDataFrames_Big(prefix,columns=columns):
    List = []
    nums_start,nums_stop = [],[]
    for item in os.listdir(prefix):
        if "data_" in item and ".npy" in item:
            timeStep_start = int(item.split(prefix+'')[1].split('_')[0])
            nums_start += [timeStep_start]
            
            timeStep_stop = int(item.split('_')[3].split('.npy')[0])
            nums_stop += [timeStep_stop]
            
    nums_start = sorted(nums_start)
    nums_stop = sorted(nums_stop)
    
    array = loadNumpy('data_6l/data_6l_'+str(nums_start[0])+'_'+str(nums_stop[0])+'.npy')
    for i in range(1,len(nums_start)):
        newFile = 'data_6l/data_6l_'+str(nums_start[i])+'_'+str(nums_stop[i])+'.npy'
        array = np.append(array,loadNumpy(newFile),axis=0)
    return pd.DataFrame(array,columns=columns)

In [310]:
df_big = combineDataFrames_Big()

In [311]:
len(df_big)

3064314

In [307]:
df_big.tail()

Unnamed: 0,timestep,x_voxel,y_voxel,z_voxel,timeOfCreation,x_laser,y_laser,z_laser,x_distance,y_distance,...,T18_t_t-1,T19_t_t-1,T20_t_t-1,T21_t_t-1,T22_t_t-1,T23_t_t-1,T24_t_t-1,T25_t_t-1,T26_t_t-1,T_self
2305435,319.0,18.0,7.5,3.0,3.0,2.268542,11.25,1.5,15.731458,3.75,...,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0
2305436,319.0,19.5,8.5,2.0,2.0,2.268542,11.25,1.5,17.231458,2.75,...,300.0,1374.17,1297.72,1204.46,1353.23,300.0,300.0,300.0,300.0,300.0
2305437,319.0,2.0,7.0,2.5,2.5,2.268542,11.25,1.5,0.268542,4.25,...,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0
2305438,319.0,3.5,12.5,2.5,2.5,2.268542,11.25,1.5,1.231458,1.25,...,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0
2305439,319.0,18.0,12.5,2.5,2.5,2.268542,11.25,1.5,15.731458,1.25,...,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0,300.0


In [312]:
df_big.to_csv('additive_6l_complete.csv',index=False)

### Saving complete data 

In [195]:
saveNumpy(df,'complete_data')

complete_data saved successfully in .


In [7]:
df = loadNumpy('complete_data')

IOError: [Errno 2] No such file or directory: './complete_data.npy'

In [36]:
historicalColumns,neighborColumns = [],[]

for historical in range(5):
    historicalColumns += ['Tminus'+str(historical+1)]

for neighbor in range(26):
    neighborColumns += ['T'+str(neighbor+1)+'_t-1']

columns = ['timestep','x_voxel','y_voxel','z_voxel','layerNum','time_creation','x_laser','y_laser','z_laser','x_distance','y_distance','z_distance','euclidean_distance_laser'] + historicalColumns+ neighborColumns+['T_self']


In [37]:
df = pd.DataFrame(loadNumpy('data_big_1_5.npy'),columns=columns)

In [38]:
df.head()

Unnamed: 0,timestep,x_voxel,y_voxel,z_voxel,layerNum,time_creation,x_laser,y_laser,z_laser,x_distance,...,T18_t-1,T19_t-1,T20_t-1,T21_t-1,T22_t-1,T23_t-1,T24_t-1,T25_t-1,T26_t-1,T_self
0,1.0,0.5,1.5,0.5,1.0,2.0,0.0,0.75,0.5,0.5,...,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,4.0
1,2.0,2.5,1.5,0.5,1.0,0.5,2.6,0.75,0.5,0.1,...,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,3549.99
2,2.0,3.0,1.0,0.5,1.0,0.5,2.6,0.75,0.5,0.4,...,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,3519.06
3,2.0,2.0,0.5,0.5,1.0,0.5,2.6,0.75,0.5,0.6,...,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,3026.39
4,2.0,0.5,1.0,0.5,1.0,0.5,2.6,0.75,0.5,2.1,...,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,1876.72
