In [110]:
import pandas as pd
import numpy as np
import torch

pd.set_option("display.max_columns", None)
pd.set_option('precision', 16)

In [151]:
import bz2
import pickle
import _pickle as cPickle


# Pickle a file and then compress it into a file with extension 
def compressed_pickle(title, data):
    with bz2.BZ2File(title + '.pbz2', 'w') as f: 
        cPickle.dump(data, f)

# Load any compressed pickle file
def decompress_pickle(file):
    data = bz2.BZ2File(file, 'rb')
    data = cPickle.load(data)
    return data


def get_features(dataframe:pd.DataFrame)->torch.Tensor:
    return dataframe[["Theta", "Pitch", "Duty Cycle", "Fill Factor", "Mode", "Lambda"]].values


def get_labels(dataframe:pd.DataFrame)->torch.Tensor:
    return dataframe[['Transmission']].values


def transform_labels(values):
    return -1/np.log10(np.abs(values))

In [78]:
training_set = decompress_pickle('training_set_08May2022.pbz2')
training_set

Unnamed: 0,Theta,Pitch,Duty Cycle,Fill Factor,Mode,Lambda,Transmission
0,5.0,0.00000050,0.4000000000000000,0.200000,0,0.00000170000,0.0000910340000000
1,5.0,0.00000050,0.4000000000000000,0.200000,0,0.00000169817,0.0001010460000000
2,5.0,0.00000050,0.4000000000000000,0.200000,0,0.00000169635,0.0001118810000000
3,5.0,0.00000050,0.4000000000000000,0.200000,0,0.00000169453,0.0001233320000000
4,5.0,0.00000050,0.4000000000000000,0.200000,0,0.00000169271,0.0001351560000000
...,...,...,...,...,...,...,...
2240395,11.0,0.00000151,0.4444439999999999,0.555556,1,0.00000131000,-0.0008086870000000
2240396,11.0,0.00000151,0.4444439999999999,0.555556,1,0.00000130000,-0.0008483639999999
2240397,11.0,0.00000151,0.4444439999999999,0.555556,1,0.00000130000,-0.0008844920000000
2240398,11.0,0.00000151,0.4444439999999999,0.555556,1,0.00000130000,-0.0009403480000000


In [79]:
len(training_set.index)

2240400

In [80]:
lambdas = []
grouped_for_lambda = training_set.groupby(['Theta', 'Pitch', 'Duty Cycle', 'Fill Factor', 'Mode']).Lambda.apply(list).reset_index()
for i in range(len(grouped_for_lambda.index)):
    lambdas.append(np.flip(grouped_for_lambda.iloc[i]['Lambda']))
grouped_for_lambda['Lambda'] = lambdas
grouped_for_lambda

Unnamed: 0,Theta,Pitch,Duty Cycle,Fill Factor,Mode,Lambda
0,5.0,0.0000005,0.4,0.2000000000000000,0,"[1.4e-06, 1.40124e-06, 1.4024899999999998e-06,..."
1,5.0,0.0000005,0.4,0.2000000000000000,1,"[1.4e-06, 1.40124e-06, 1.4024899999999998e-06,..."
2,5.0,0.0000005,0.4,0.2571430000000000,0,"[1.4e-06, 1.40124e-06, 1.4024899999999998e-06,..."
3,5.0,0.0000005,0.4,0.2571430000000000,1,"[1.4e-06, 1.40124e-06, 1.4024899999999998e-06,..."
4,5.0,0.0000005,0.4,0.2800000000000000,0,"[1.3e-06, 1.3e-06, 1.3e-06, 1.3e-06, 1.31e-06,..."
...,...,...,...,...,...,...
11167,20.0,0.0000015,0.8,0.5200000000000000,1,"[1.3e-06, 1.3e-06, 1.3e-06, 1.3e-06, 1.31e-06,..."
11168,20.0,0.0000015,0.8,0.5428569999999999,0,"[1.4e-06, 1.40124e-06, 1.4024899999999998e-06,..."
11169,20.0,0.0000015,0.8,0.5428569999999999,1,"[1.4e-06, 1.40124e-06, 1.4024899999999998e-06,..."
11170,20.0,0.0000015,0.8,0.6000000000000000,0,"[1.3e-06, 1.3e-06, 1.3e-06, 1.3e-06, 1.31e-06,..."


In [86]:
transmissions = []
grouped_for_transmission = training_set.groupby(['Theta', 'Pitch', 'Duty Cycle', 'Fill Factor', 'Mode']).Transmission.apply(list).reset_index()
for i in range(len(grouped_for_transmission.index)):
    transmissions.append(np.flip(grouped_for_transmission.iloc[i]['Transmission']))
grouped_for_transmission['Transmission'] = transmissions
grouped_for_transmission

Unnamed: 0,Theta,Pitch,Duty Cycle,Fill Factor,Mode,Transmission
0,5.0,0.0000005,0.4,0.2000000000000000,0,"[0.00024694, 0.000222525, 0.000200177, 0.00017..."
1,5.0,0.0000005,0.4,0.2000000000000000,1,"[-0.000502307, -0.000521784, -0.000540792, -0...."
2,5.0,0.0000005,0.4,0.2571430000000000,0,"[0.000555697, 0.000525419, 0.000495302, 0.0004..."
3,5.0,0.0000005,0.4,0.2571430000000000,1,"[-0.000424704, -0.000424642, -0.000424468, -0...."
4,5.0,0.0000005,0.4,0.2800000000000000,0,"[0.000155323, 0.000114162, 8.32e-05, 6.43e-05,..."
...,...,...,...,...,...,...
11167,20.0,0.0000015,0.8,0.5200000000000000,1,"[-0.0180789, -0.0188972, -0.0199639, -0.021313..."
11168,20.0,0.0000015,0.8,0.5428569999999999,0,"[0.00405141, 0.00416513, 0.00426947, 0.0043639..."
11169,20.0,0.0000015,0.8,0.5428569999999999,1,"[-0.013056, -0.0132126, -0.0133639, -0.013507,..."
11170,20.0,0.0000015,0.8,0.6000000000000000,0,"[0.000377711, 0.0003538, 0.000336613, 0.000338..."


In [88]:
data = []
for i in range(len(grouped_by_lambda.index)):
    data.append(grouped_for_transmission.iloc[i]['Transmission'])
grouped_for_lambda['Transmission'] = data
grouped_for_lambda

Unnamed: 0,Theta,Pitch,Duty Cycle,Fill Factor,Mode,Lambda,Transmission
0,5.0,0.0000005,0.4,0.2000000000000000,0,"[1.4e-06, 1.40124e-06, 1.4024899999999998e-06,...","[0.00024694, 0.000222525, 0.000200177, 0.00017..."
1,5.0,0.0000005,0.4,0.2000000000000000,1,"[1.4e-06, 1.40124e-06, 1.4024899999999998e-06,...","[-0.000502307, -0.000521784, -0.000540792, -0...."
2,5.0,0.0000005,0.4,0.2571430000000000,0,"[1.4e-06, 1.40124e-06, 1.4024899999999998e-06,...","[0.000555697, 0.000525419, 0.000495302, 0.0004..."
3,5.0,0.0000005,0.4,0.2571430000000000,1,"[1.4e-06, 1.40124e-06, 1.4024899999999998e-06,...","[-0.000424704, -0.000424642, -0.000424468, -0...."
4,5.0,0.0000005,0.4,0.2800000000000000,0,"[1.3e-06, 1.3e-06, 1.3e-06, 1.3e-06, 1.31e-06,...","[0.000155323, 0.000114162, 8.32e-05, 6.43e-05,..."
...,...,...,...,...,...,...,...
11167,20.0,0.0000015,0.8,0.5200000000000000,1,"[1.3e-06, 1.3e-06, 1.3e-06, 1.3e-06, 1.31e-06,...","[-0.0180789, -0.0188972, -0.0199639, -0.021313..."
11168,20.0,0.0000015,0.8,0.5428569999999999,0,"[1.4e-06, 1.40124e-06, 1.4024899999999998e-06,...","[0.00405141, 0.00416513, 0.00426947, 0.0043639..."
11169,20.0,0.0000015,0.8,0.5428569999999999,1,"[1.4e-06, 1.40124e-06, 1.4024899999999998e-06,...","[-0.013056, -0.0132126, -0.0133639, -0.013507,..."
11170,20.0,0.0000015,0.8,0.6000000000000000,0,"[1.3e-06, 1.3e-06, 1.3e-06, 1.3e-06, 1.31e-06,...","[0.000377711, 0.0003538, 0.000336613, 0.000338..."


In [89]:
compressed_pickle('training_set_10May2022', grouped_for_lambda)

In [97]:
training_set = decompress_pickle('training_set/10May2022/training_set_10May2022.pbz2')
training_set

Unnamed: 0,Theta,Pitch,Duty Cycle,Fill Factor,Mode,Lambda,Transmission
0,5.0,0.0000005,0.4,0.2000000000000000,0,"[1.4e-06, 1.40124e-06, 1.4024899999999998e-06,...","[0.00024694, 0.000222525, 0.000200177, 0.00017..."
1,5.0,0.0000005,0.4,0.2000000000000000,1,"[1.4e-06, 1.40124e-06, 1.4024899999999998e-06,...","[-0.000502307, -0.000521784, -0.000540792, -0...."
2,5.0,0.0000005,0.4,0.2571430000000000,0,"[1.4e-06, 1.40124e-06, 1.4024899999999998e-06,...","[0.000555697, 0.000525419, 0.000495302, 0.0004..."
3,5.0,0.0000005,0.4,0.2571430000000000,1,"[1.4e-06, 1.40124e-06, 1.4024899999999998e-06,...","[-0.000424704, -0.000424642, -0.000424468, -0...."
4,5.0,0.0000005,0.4,0.2800000000000000,0,"[1.3e-06, 1.3e-06, 1.3e-06, 1.3e-06, 1.31e-06,...","[0.000155323, 0.000114162, 8.32e-05, 6.43e-05,..."
...,...,...,...,...,...,...,...
11167,20.0,0.0000015,0.8,0.5200000000000000,1,"[1.3e-06, 1.3e-06, 1.3e-06, 1.3e-06, 1.31e-06,...","[-0.0180789, -0.0188972, -0.0199639, -0.021313..."
11168,20.0,0.0000015,0.8,0.5428569999999999,0,"[1.4e-06, 1.40124e-06, 1.4024899999999998e-06,...","[0.00405141, 0.00416513, 0.00426947, 0.0043639..."
11169,20.0,0.0000015,0.8,0.5428569999999999,1,"[1.4e-06, 1.40124e-06, 1.4024899999999998e-06,...","[-0.013056, -0.0132126, -0.0133639, -0.013507,..."
11170,20.0,0.0000015,0.8,0.6000000000000000,0,"[1.3e-06, 1.3e-06, 1.3e-06, 1.3e-06, 1.31e-06,...","[0.000377711, 0.0003538, 0.000336613, 0.000338..."


In [None]:
grouped_for = training_set.groupby(['Theta', 'Pitch', 'Duty Cycle', 'Fill Factor', 'Mode']).Lambda.apply(list).reset_index()
grouped_by_lambda

In [5]:
compressed_pickle('training_set_08May2022', dataframe)

In [139]:
training_set = decompress_pickle('training_set/10May2022/training_set_10May2022.pbz2')
# norm features up to lambda
columns_to_norm = ['Theta', 'Pitch', 'Duty Cycle', 'Fill Factor', 'Mode']
training_set[columns_to_norm] = training_set[columns_to_norm].apply(lambda x: (x-x.min()) / (x.max() - x.min()))
# norm the lambdas
lambdas_normed = []
for i in range(len(grouped_for_lambda.index)):
    temp = (grouped_for_lambda.iloc[i]['Lambda'] - grouped_for_lambda.iloc[i]['Lambda'].min()) / (grouped_for_lambda.iloc[i]['Lambda'].max() - grouped_for_lambda.iloc[i]['Lambda'].min())
    lambdas_normed.append(temp)
for i in range(len(lambdas_normed)):
    lambdas_normed[i] = torch.tensor(lambdas_normed[i], dtype=torch.float32)
training_set['Lambda'] = lambdas_normed
compressed_pickle('training_set/10May2022/training_set_10May2022_normalized_features.pbz2', training_set)
training_set

Unnamed: 0,Theta,Pitch,Duty Cycle,Fill Factor,Mode,Lambda,Transmission
0,0.0,0.0000000000000000,0.0,0.0000000000000000,0.0,"[tensor(0.), tensor(0.0041), tensor(0.0083), t...","[0.00024694, 0.000222525, 0.000200177, 0.00017..."
1,0.0,0.0000000000000000,0.0,0.0000000000000000,1.0,"[tensor(0.), tensor(0.0041), tensor(0.0083), t...","[-0.000502307, -0.000521784, -0.000540792, -0...."
2,0.0,0.0000000000000000,0.0,0.1428575000000000,0.0,"[tensor(0.), tensor(0.0041), tensor(0.0083), t...","[0.000555697, 0.000525419, 0.000495302, 0.0004..."
3,0.0,0.0000000000000000,0.0,0.1428575000000000,1.0,"[tensor(0.), tensor(0.0041), tensor(0.0083), t...","[-0.000424704, -0.000424642, -0.000424468, -0...."
4,0.0,0.0000000000000000,0.0,0.2000000000000001,0.0,"[tensor(0.), tensor(0.), tensor(0.), tensor(0....","[0.000155323, 0.000114162, 8.32e-05, 6.43e-05,..."
...,...,...,...,...,...,...,...
11167,1.0,0.9900990099009902,1.0,0.8000000000000000,1.0,"[tensor(0.), tensor(0.), tensor(0.), tensor(0....","[-0.0180789, -0.0188972, -0.0199639, -0.021313..."
11168,1.0,0.9900990099009902,1.0,0.8571424999999998,0.0,"[tensor(0.), tensor(0.0041), tensor(0.0083), t...","[0.00405141, 0.00416513, 0.00426947, 0.0043639..."
11169,1.0,0.9900990099009902,1.0,0.8571424999999998,1.0,"[tensor(0.), tensor(0.0041), tensor(0.0083), t...","[-0.013056, -0.0132126, -0.0133639, -0.013507,..."
11170,1.0,0.9900990099009902,1.0,1.0000000000000000,0.0,"[tensor(0.), tensor(0.), tensor(0.), tensor(0....","[0.000377711, 0.0003538, 0.000336613, 0.000338..."


In [153]:
x = get_features(training_set)
x

array([[0.0, 0.0, 0.0, 0.0, 0.0,
        tensor([0.0000, 0.0041, 0.0083, 0.0124, 0.0166, 0.0208, 0.0250, 0.0291, 0.0333,
                0.0375, 0.0418, 0.0460, 0.0502, 0.0544, 0.0587, 0.0629, 0.0672, 0.0714,
                0.0757, 0.0800, 0.0843, 0.0886, 0.0929, 0.0972, 0.1015, 0.1058, 0.1101,
                0.1145, 0.1188, 0.1232, 0.1275, 0.1319, 0.1363, 0.1407, 0.1451, 0.1495,
                0.1539, 0.1583, 0.1627, 0.1672, 0.1716, 0.1761, 0.1805, 0.1850, 0.1895,
                0.1940, 0.1985, 0.2030, 0.2075, 0.2120, 0.2165, 0.2211, 0.2256, 0.2301,
                0.2347, 0.2393, 0.2439, 0.2484, 0.2530, 0.2576, 0.2623, 0.2669, 0.2715,
                0.2761, 0.2808, 0.2854, 0.2901, 0.2948, 0.2995, 0.3042, 0.3089, 0.3136,
                0.3183, 0.3230, 0.3277, 0.3325, 0.3372, 0.3420, 0.3468, 0.3516, 0.3563,
                0.3611, 0.3660, 0.3708, 0.3756, 0.3804, 0.3853, 0.3901, 0.3950, 0.3999,
                0.4048, 0.4096, 0.4145, 0.4195, 0.4244, 0.4293, 0.4342, 0.4392, 0.4442,

In [166]:
y = get_labels(training_set)
temp = []
for i in range(len(y)):
    temp.append(np.log10(np.abs(y[i][0])))    
temp

[array([-3.60740856, -3.65262119, -3.69858582, -3.74494711, -3.79128192,
        -3.8370803 , -3.88175181, -3.9246392 , -3.9650228 , -4.00217168,
        -4.03538732, -4.06404708, -4.08766739, -4.10594714, -4.11879115,
        -4.12631096, -4.12879906, -4.12668521, -4.12048896, -4.11076891,
        -4.09808528, -4.08297421, -4.06593353, -4.04741725, -4.02783883,
        -4.00757595, -3.98697729, -3.96637121, -3.94606151, -3.92635129,
        -3.90752499, -3.88986165, -3.87363478, -3.85911502, -3.84656321,
        -3.83624843, -3.8284315 , -3.82338212, -3.82137984, -3.82271463,
        -3.82769779, -3.83666864, -3.85000232, -3.8681184 , -3.89150263,
        -3.92072105, -3.95644125, -3.99945747, -4.050732  , -4.11140888,
        -4.18285392, -4.26661493, -4.36423888, -4.47661777, -4.60207389,
        -4.73136821, -4.83822485, -4.87637373, -4.81486598, -4.67864602,
        -4.51503001, -4.35314946, -4.20383032, -4.06936522, -3.94906371,
        -3.84144453, -3.74496642, -3.65827868, -3.5

In [136]:
compressed_pickle('training_set/10May2022/training_set_10May2022_normalized_features.pbz2', training_set)

In [8]:
compressed_pickle('training_set_normalized_features_08May2022', dataframe)

In [26]:
training_set = decompress_pickle('training_set.pbz2')
training_set

Unnamed: 0,Theta,Pitch,Duty Cycle,Fill Factor,Mode,Lambda,Transmission
0,5.0,0.0000005,0.4,0.2,0,0.00000170000,0.000091034
1,5.0,0.0000005,0.4,0.2,0,0.00000169817,0.000101046
2,5.0,0.0000005,0.4,0.2,0,0.00000169635,0.000111881
3,5.0,0.0000005,0.4,0.2,0,0.00000169453,0.000123332
4,5.0,0.0000005,0.4,0.2,0,0.00000169271,0.000135156
...,...,...,...,...,...,...,...
2156395,20.0,0.0000015,0.8,0.6,1,0.00000130618,-0.012646500
2156396,20.0,0.0000015,0.8,0.6,1,0.00000130463,-0.012309600
2156397,20.0,0.0000015,0.8,0.6,1,0.00000130308,-0.012144300
2156398,20.0,0.0000015,0.8,0.6,1,0.00000130154,-0.011917400


In [27]:
columns_to_norm = ['Theta', 'Pitch', 'Duty Cycle', 'Fill Factor', 'Mode', 'Lambda']
training_set[columns_to_norm] = training_set[columns_to_norm].apply(lambda x: (x-x.min()) / (x.max() - x.min()))
training_set

Unnamed: 0,Theta,Pitch,Duty Cycle,Fill Factor,Mode,Lambda,Transmission
0,0.0,0.0,0.0,0.0,0.0,1.0000000000000000,0.000091034
1,0.0,0.0,0.0,0.0,0.0,0.9954249999999997,0.000101046
2,0.0,0.0,0.0,0.0,0.0,0.9908750000000001,0.000111881
3,0.0,0.0,0.0,0.0,0.0,0.9863250000000000,0.000123332
4,0.0,0.0,0.0,0.0,0.0,0.9817749999999998,0.000135156
...,...,...,...,...,...,...,...
2156395,1.0,1.0,1.0,1.0,1.0,0.0154499999999998,-0.012646500
2156396,1.0,1.0,1.0,1.0,1.0,0.0115749999999998,-0.012309600
2156397,1.0,1.0,1.0,1.0,1.0,0.0076999999999997,-0.012144300
2156398,1.0,1.0,1.0,1.0,1.0,0.0038499999999999,-0.011917400


In [28]:
compressed_pickle('training_set_normalized_features', training_set)

In [29]:
testing_set = decompress_pickle('testing_set.pbz2')
testing_set

Unnamed: 0,Theta,Pitch,Duty Cycle,Fill Factor,Mode,Lambda,Transmission
0,10.0,0.0000005,0.4,0.2,0,0.00000170000,-0.0000150191
1,10.0,0.0000005,0.4,0.2,0,0.00000169738,-0.0000232995
2,10.0,0.0000005,0.4,0.2,0,0.00000169476,-0.0000328789
3,10.0,0.0000005,0.4,0.2,0,0.00000169215,-0.0000431445
4,10.0,0.0000005,0.4,0.2,0,0.00000168955,-0.0000535258
...,...,...,...,...,...,...,...
102395,20.0,0.0000015,0.8,0.6,1,0.00000130618,-0.0126465000
102396,20.0,0.0000015,0.8,0.6,1,0.00000130463,-0.0123096000
102397,20.0,0.0000015,0.8,0.6,1,0.00000130308,-0.0121443000
102398,20.0,0.0000015,0.8,0.6,1,0.00000130154,-0.0119174000


In [31]:
columns_to_norm = ['Theta', 'Pitch', 'Duty Cycle', 'Fill Factor', 'Mode', 'Lambda']
testing_set[columns_to_norm] = testing_set[columns_to_norm].apply(lambda x: (x-x.min()) / (x.max() - x.min()))
testing_set

Unnamed: 0,Theta,Pitch,Duty Cycle,Fill Factor,Mode,Lambda,Transmission
0,0.0,0.0,0.0,0.0,0.0,1.0000000000000000,-0.0000150191
1,0.0,0.0,0.0,0.0,0.0,0.9934499999999997,-0.0000232995
2,0.0,0.0,0.0,0.0,0.0,0.9869000000000000,-0.0000328789
3,0.0,0.0,0.0,0.0,0.0,0.9803749999999999,-0.0000431445
4,0.0,0.0,0.0,0.0,0.0,0.9738749999999999,-0.0000535258
...,...,...,...,...,...,...,...
102395,1.0,1.0,1.0,1.0,1.0,0.0154499999999998,-0.0126465000
102396,1.0,1.0,1.0,1.0,1.0,0.0115749999999998,-0.0123096000
102397,1.0,1.0,1.0,1.0,1.0,0.0076999999999997,-0.0121443000
102398,1.0,1.0,1.0,1.0,1.0,0.0038499999999999,-0.0119174000


In [32]:
compressed_pickle('testing_set_normalized_features', testing_set)