# Creating Synthetic Dataset
The objective of this code is to create an artificicial dataset encompassing all the values within a given range for each feature seperated by an assiagned sensitivity. This synthetic dataset will then be able to be plugged into the regression model to search and save optimal values. 

In [12]:
import numpy as np
import matplotlib.mlab as mlab
import matplotlib.pyplot as plt
import pandas as pd
from random import *

#### Reads in data to ensure compatibility of synthetic and real dataset

In [13]:
df = pd.read_csv('../deviceSummary.csv')
columns = list(df.columns)
print(columns)

['devices', 'temperature', 'speed', 'distance', 'pressure', 'efficiency']


In [14]:
def getRange(regions, tempMin, tempMax):
    tempRange = tempMax - tempMin
    tempIncrements = tempRange / regions
    tempBins = []
    for i in range(4):
        tempBins.append(int(tempMin + tempIncrements*i))
    tempBins.append(tempMax)
    return tempBins


In [15]:
bins = 100
temperatureRange, speedRange, distanceRange, pressureRange = getRange(bins, 50, 80), getRange(bins, 125, 200), getRange(bins, 500, 800), getRange(bins, 0, 100)
print(temperatureRange)
print(speedRange)
print(distanceRange) # Multiplied by 100 to prevent decimals
print(pressureRange) # Multiplied by 10 to prevent decimals

 

[50, 50, 50, 50, 80]
[125, 125, 126, 127, 200]
[500, 503, 506, 509, 800]
[0, 1, 2, 3, 100]


In [19]:
incrementalFactor = 1 
multiplier = 1

tRange = [50, 80]
temp = tRange[0] - incrementalFactor
tVals = []
for i in range((tRange[1] - tRange[0]) * multiplier + 1):
    temp += incrementalFactor
    tVals.append(temp)

sRange = [125, 200]
temp = sRange[0] - incrementalFactor
sVals = []
for i in range((sRange[1] - sRange[0]) * multiplier + 1):
    temp += incrementalFactor
    sVals.append(temp)

dRange = [5, 8]
temp = dRange[0] - incrementalFactor
dVals = []
for i in range((dRange[1] - dRange[0]) * multiplier + 1):
    temp += incrementalFactor
    dVals.append(temp)

pRange = [0, 10]
temp = pRange[0] - incrementalFactor
pVals = []
for i in range((pRange[1] - pRange[0]) * multiplier + 1):
    temp += incrementalFactor
    pVals.append(temp)

print(tVals)
print(sVals)
print(pVals)
print(dVals)


[50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80]
[125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200]
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
[5, 6, 7, 8]


In [20]:
from IPython.display import display
newDF = pd.DataFrame(columns=["Temperature","Speed","Distance","Pressure"])
counter = 0
for t in tVals:
    for s in sVals:
        for d in dVals:
            for p in pVals:
                #t, s, d, p = randint(temperatureRange[i], temperatureRange[i+1]),randint(speedRange[j], speedRange[j+1]),round(randint(distanceRange[k], distanceRange[k+1])/100), round(randint(pressureRange[l], pressureRange[l+1])/10)
                df2 = {'device': counter, 'Temperature': t, 'Speed': s, 'Distance':d, "Pressure":p}
                newDF = newDF.append(df2, ignore_index = True)
display(newDF)
newDF.to_csv('optimizedSyntheticDataset.csv')

Unnamed: 0,Temperature,Speed,Distance,Pressure,device
0,50,125,5,0,0.0
1,50,125,5,1,0.0
2,50,125,5,2,0.0
3,50,125,5,3,0.0
4,50,125,5,4,0.0
...,...,...,...,...,...
103659,80,200,8,6,0.0
103660,80,200,8,7,0.0
103661,80,200,8,8,0.0
103662,80,200,8,9,0.0
