In [None]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import r2_score
data_dir = '/content/drive/Shareddrives/EEE405 Group Project/MagnetChallenge/data'

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
def get_data_from_material(material = '3E6'):
  MATERIAL = material #Updated Material to 3E6
  TABLES = {
      "B": "B_waveform[T].csv",
      "Freq": "Frequency[Hz].csv",
      "H": "H_waveform[Am-1].csv",
      "Temp": "Temperature[C].csv",
      "Loss": "Volumetric_losses[Wm-3].csv",
  }

  # Features (Raw data; B may be processed into a scalar)
  B = pd.read_csv(os.path.join(data_dir, MATERIAL, 'B_waveform[T].csv'), header=None)
  Temp = pd.read_csv(os.path.join(data_dir, MATERIAL, 'Temperature[C].csv'), header=None)
  Freq = pd.read_csv(os.path.join(data_dir, MATERIAL, 'Frequency[Hz].csv'), header=None)

  # Targets
  Loss = pd.read_csv(os.path.join(data_dir,MATERIAL, 'Volumetric_losses[Wm-3].csv'), header=None)
  return B, Temp, Freq, Loss

def average_error(y_test, y_pred):
  y_test = np.array(y_test)
  y_pred = np.array(y_pred)
  avg_error = 100 * np.mean(np.abs((np.divide((np.subtract(y_test, y_pred)), y_test))), axis = 0)
  return np.ndarray.item(avg_error)

def maximum_error(y_test, y_pred):
  y_test = np.array(y_test)
  y_pred = np.array(y_pred)
  avg_error = 100 * np.max(np.abs((np.divide((np.subtract(y_test, y_pred)), y_test))), axis = 0)
  return np.ndarray.item(avg_error)

def lin_reg_Bmax(material, mute = False):
  B, Temp, Freq, Loss = get_data_from_material(material)
  B_max = B.max(axis=1)

  # Data matrix; each row is a different observation, each column is a different feature
  X = pd.concat([Freq, Temp, B_max], axis=1)

  # Target vector; column vector with Loss data
  y = Loss

  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

  model = LinearRegression()
  model.fit(X_train, y_train)
  y_pred = model.predict(X_test)

  error = mse(y_test, y_pred)
  r2_test = r2_score(y_test, y_pred)
  avg_error = average_error(y_test, y_pred)
  max_error = maximum_error(y_test, y_pred)

  if mute != True:
    print(f'For material: {material}')
    print(f' - Model: Sklearn Linear Regression')
    print(f' - {"Mean Squared Error: ":<40} {error:>15.6g}')
    print(f' - {"R2 Score: ":<40} {r2_test:>15.6g}')
    print(f' - {"Average Relataive Percent Error: ":<40} {avg_error:>15.6g}')
    print(f' - {"Maximum Relative Percent Error: ":<40} {max_error:>15.6g}')

  return error, r2_test, avg_error, max_error

def lin_reg_allB(material, mute = False):
  B, Temp, Freq, Loss = get_data_from_material(material)

  # Data matrix; each row is a different observation, each column is a different feature
  X = pd.concat([Freq, Temp, B], axis=1)

  # Target vector; column vector with Loss data
  y = Loss

  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

  model = LinearRegression()
  model.fit(X_train, y_train)
  y_pred = model.predict(X_test)

  error = mse(y_test, y_pred)
  r2_test = r2_score(y_test, y_pred)
  avg_error = average_error(y_test, y_pred)
  max_error = maximum_error(y_test, y_pred)

  if mute != True:
    print(f'For material: {material}')
    print(f' - Model: Sklearn Linear Regression')
    print(f' - {"Mean Squared Error: ":<40} {error:>15.6g}')
    print(f' - {"R2 Score: ":<40} {r2_test:>15.6g}')
    print(f' - {"Average Relataive Percent Error: ":<40} {avg_error:>15.6g}')
    print(f' - {"Maximum Relative Percent Error: ":<40} {max_error:>15.6g}')

  return error, r2_test, avg_error, max_error


In [None]:
# Run lin_reg_Bmax for just one material

material = '3E6_cycle'
lin_reg_Bmax(material)

In [None]:
# Run lin_reg_Bmax for all materials

for material in os.listdir(data_dir):
  lin_reg_Bmax(material)

In [None]:
# Run lin_reg_allB for just one material

material = '3E6_cycle'
lin_reg_allB(material)

In [None]:
# Run lin_reg_allB for all materials

for material in os.listdir(data_dir):
  lin_reg_allB(material)

In [None]:
# Run lin_reg_Bmax and lin_reg_allB for all materials, compare avg_error between them

errors_Bmax = []
errors_allB = []
r2s_Bmax = []
r2s_allB = []
avgs_Bmax = []
avgs_allB = []
maxs_Bmax = []
maxs_allB = []

for material in os.listdir(data_dir):

  error_Bmax, r2_Bmax, avg_error_Bmax, max_error_Bmax = lin_reg_Bmax(material, mute = True)
  error_allB, r2_allB, avg_error_allB, max_error_allB = lin_reg_allB(material, mute = True)

  errors_Bmax.append(error_Bmax)
  errors_allB.append(error_allB)
  r2s_Bmax.append(r2_Bmax)
  r2s_allB.append(r2_allB)
  avgs_Bmax.append(avg_error_Bmax)
  avgs_allB.append(avg_error_allB)
  maxs_Bmax.append(max_error_Bmax)
  maxs_allB.append(max_error_allB)

data = [[errors_Bmax, errors_allB],
        [r2s_Bmax, r2s_allB],
        [avgs_Bmax, avgs_allB],
        [maxs_Bmax, maxs_allB]]

#index = ('MSE: ',
#         'R2:',
#         'Average Relative % Error:',
#         'Max Relative % Error:')
#columns =

#error_dataframe = pd.DataFrame(data, index, columns = os.listdir(data_dir))
error_dataframe = pd.DataFrame(data)
error_dataframe


Unnamed: 0,0,1
0,"[22314574790.964138, 62355398044.114075, 20514...","[14374875332.109392, 28813408771.765877, 76995..."
1,"[0.6944374390743108, 0.6325988056628677, 0.617...","[0.8031589774569448, 0.8302299219037694, 0.856..."
2,"[884.0980022048559, 822.5462117143078, 299.864...","[761.4094089375769, 547.4232429565711, 137.266..."
3,"[50705.96709929842, 40508.41890376001, 26267.2...","[37307.26875734503, 28073.067129222727, 9369.6..."


In [None]:
from google.colab import data_table
data_table.enable_dataframe_formatter()

print('(x,y) = (Bmax, allB) for all data points')
error_dataframe = pd.DataFrame(data = (errors, r2s, avgs, maxs), index = ('MSE: ', 'R2:', 'Average Relative % Error:', 'Max Relative % Error:'), columns = os.listdir(data_dir))
error_dataframe


(x,y) = (Bmax, allB) for all data points


Unnamed: 0,3C90_cycle,3C94_cycle,3E6_cycle,3F4_cycle,77_cycle,78_cycle,N27_cycle,N30_cycle,N49_cycle,N87_cycle
MSE:,"(22314574790.964138, 14374875332.109392)","(62355398044.114075, 28813408771.765877)","(205146376684.65945, 76995614607.86584)","(44728814672.45112, 63927092694.064026)","(70577217901.97806, 36537776662.273346)","(57393083441.398384, 170264390297.24915)","(71680452016.48767, 365953059905.3602)","(286992356156.995, 126097564395.18364)","(188058311223.61615, 383715191727.3743)","(20314890227.729927, 11468061710.277794)"
R2:,"(0.6944374390743108, 0.8031589774569448)","(0.6325988056628677, 0.8302299219037694)","(0.6173513486017952, 0.8563841654461015)","(0.8155286399620988, 0.7363507658563953)","(0.6511387614822096, 0.8193947792502905)","(0.6725716886081595, 0.02863936798041422)","(0.702099297412329, -0.5208842940169938)","(0.6318946498814322, 0.8382633296846558)","(0.7435984342085222, 0.47683686332858666)","(0.7274018252988341, 0.846114221906313)"
Average Relative % Error:,"(884.0980022048559, 761.4094089375769)","(822.5462117143078, 547.4232429565711)","(299.8640611149277, 137.26667235412194)","(726.1270793023654, 484.35689080568494)","(934.7412220664097, 635.0077823018664)","(803.9000443180569, 649.3917315747406)","(843.2711929319504, 654.3963731074657)","(636.0959064631955, 215.57598946599916)","(671.3849891615763, 577.5232728398125)","(786.4656749350013, 660.9395560276191)"
Max Relative % Error:,"(50705.96709929842, 37307.26875734503)","(40508.41890376001, 28073.067129222727)","(26267.292659810046, 9369.698423707563)","(18776.958507331048, 13978.159166642758)","(76310.51110603152, 33110.641088659744)","(63061.55152808163, 40296.712944399)","(42967.99915920761, 25748.38959091153)","(62483.96458845309, 15521.632162812597)","(28313.22112968328, 25192.631048758467)","(40167.40576860963, 26506.607404802482)"


In [None]:
#data_table.enable_dataframe_formatter()
data_table.disable_dataframe_formatter()
pd.set_option("display.precision", 5)

data = [errors_Bmax, errors_allB, r2s_Bmax, r2s_allB, avgs_Bmax, avgs_allB, maxs_Bmax, maxs_allB]

index = ['MSE Bmax:',
         'MSE allB:',
         'R2 Bmax:',
         'R2 allB:',
         'Avg Rel % Error Bmax:',
         'Avg Rel % Error allB:',
         'Max Rel % Error Bmax:',
         'Max Rel % Error allB:',]

error_dataframe = pd.DataFrame(data, index, columns = os.listdir(data_dir))
error_dataframe


Unnamed: 0,3C90_cycle,3C94_cycle,3E6_cycle,3F4_cycle,77_cycle,78_cycle,N27_cycle,N30_cycle,N49_cycle,N87_cycle
MSE Bmax:,22314600000.0,62355400000.0,205146000000.0,44728800000.0,70577200000.0,57393100000.0,71680500000.0,286992000000.0,188058000000.0,20314900000.0
MSE allB:,14374900000.0,28813400000.0,76995600000.0,63927100000.0,36537800000.0,170264000000.0,365953000000.0,126098000000.0,383715000000.0,11468100000.0
R2 Bmax:,0.694437,0.632599,0.617351,0.815529,0.651139,0.672572,0.702099,0.631895,0.743598,0.727402
R2 allB:,0.803159,0.83023,0.856384,0.736351,0.819395,0.0286394,-0.520884,0.838263,0.476837,0.846114
Avg Rel % Error Bmax:,884.098,822.546,299.864,726.127,934.741,803.9,843.271,636.096,671.385,786.466
Avg Rel % Error allB:,761.409,547.423,137.267,484.357,635.008,649.392,654.396,215.576,577.523,660.94
Max Rel % Error Bmax:,50706.0,40508.4,26267.3,18777.0,76310.5,63061.6,42968.0,62484.0,28313.2,40167.4
Max Rel % Error allB:,37307.3,28073.1,9369.7,13978.2,33110.6,40296.7,25748.4,15521.6,25192.6,26506.6
