In [2]:
import pandas as pd

# Read the data from the CSV file
df = pd.read_excel('dataset/combinedextended.xlsx')

In [None]:
from sklearn.model_selection import train_test_split

X = df[['Frequency', 'Amplitude']]
y = df['Mass']

# 

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2)


Linear Interpolation for Amplitude for different Mass

In [2]:
import pandas as pd
import numpy as np


# get unique frequency values
freqs = df['Frequency'].unique()

# create an empty list to store the interpolated dataframes
interp_dfs = []

# loop through each frequency and interpolate missing mass values
for freq in freqs:
    # get data for this frequency
    freq_df = df[df['Frequency'] == freq]

    # sort the data by mass
    freq_df = freq_df.sort_values('Mass')

    # get mass and amplitude values
    masses = freq_df['Mass'].values
    amplitudes = freq_df['Amplitude'].values

    # create a new mass array with equal spacing
    new_masses = np.arange(masses[0], masses[-1]+1, 0.4999)

    # interpolate amplitude values for the new mass array
    new_amplitudes = np.interp(new_masses, masses, amplitudes)

    # create a new dataframe with interpolated values
    interp_df = pd.DataFrame({'Frequency': freq, 'Mass': new_masses, 'Amplitude': new_amplitudes})

    # append the interpolated data to the list
    interp_dfs.append(interp_df)

# concatenate the interpolated dataframes into a single dataframe
interp_df = pd.concat(interp_dfs, ignore_index=True)

# merge the interpolated data with the original data
df = pd.concat([df, interp_df], ignore_index=True)

# remove duplicates
df = df.drop_duplicates()

interp_df

Unnamed: 0,Frequency,Mass,Amplitude
0,8,0.0000,0.001400
1,8,0.4999,0.001485
2,8,0.9998,0.001570
3,8,1.4997,0.001656
4,8,1.9996,0.001741
...,...,...,...
2902,30,158.9682,0.500652
2903,30,159.4681,0.502855
2904,30,159.9680,0.505059
2905,30,160.4679,0.505200


Linear Interpolation for Amplitude and Unbalance Force for different Mass

In [3]:
import pandas as pd
import numpy as np

# get unique frequency values
freqs = df['Frequency'].unique()

# create an empty list to store the interpolated dataframes
interp_dfs = []

# loop through each frequency and interpolate missing mass values
for freq in freqs:
    # get data for this frequency
    freq_df = df[df['Frequency'] == freq]

    # sort the data by mass
    freq_df = freq_df.sort_values('Mass')

    # get mass, amplitude, and unbalance force values
    masses = freq_df['Mass'].values
    amplitudes = freq_df['Amplitude'].values
    unbalance_forces = freq_df['Unbalance Force'].values

    # create a new mass array with equal spacing
    new_masses = np.arange(masses[0], masses[-1]+1, 0.4999)

    # interpolate amplitude and unbalance force values for the new mass array
    new_amplitudes = np.interp(new_masses, masses, amplitudes)
    new_unbalance_forces = np.interp(new_masses, masses, unbalance_forces)

    # create a new dataframe with interpolated values
    interp_df = pd.DataFrame({'Frequency': freq, 'Mass': new_masses, 'Amplitude': new_amplitudes, 'Unbalance Force': new_unbalance_forces})

    # append the interpolated data to the list
    interp_dfs.append(interp_df)

# concatenate the interpolated dataframes into a single dataframe
interp_df = pd.concat(interp_dfs, ignore_index=True)

# merge the interpolated data with the original data
df = pd.concat([df, interp_df], ignore_index=True)

# remove duplicates
df = df.drop_duplicates()

interp_df

Unnamed: 0,Frequency,Mass,Amplitude,Unbalance Force
0,8,0.0000,0.001400,1.000000e-09
1,8,0.4999,0.001485,1.051578e-05
2,8,0.9998,0.001570,2.103057e-05
3,8,1.4997,0.001656,3.154535e-05
4,8,1.9996,0.001741,4.206014e-05
...,...,...,...,...
2902,30,158.9682,0.500652,4.689881e-02
2903,30,159.4681,0.502855,4.710582e-02
2904,30,159.9680,0.505059,4.731283e-02
2905,30,160.4679,0.505200,4.732608e-02


In [4]:
# Save the interpolated DataFrame to an Excel file
interp_df.to_excel('inter-step.xlsx', index=False)


In [None]:
from matplotlib import pyplot as plt
interp_df.plot(kind='scatter', x='Mass', y='Amplitude', s=32, alpha=.8)
plt.gca().spines[['top', 'right',]].set_visible(False)

In [None]:
import numpy as np

# frequency to check accuracy
freq = 30

# get data for the given frequency
freq_df = df[df['Frequency'] == freq]

# sort the data by mass
freq_df = freq_df.sort_values('Mass')

# get mass and amplitude values
masses = freq_df['Mass'].values
amplitudes = freq_df['Amplitude'].values

# create a new mass array with equal spacing
new_masses = np.arange(masses[0], masses[-1]+1, 5)

# interpolate amplitude values for the new mass array
new_amplitudes = np.interp(new_masses, masses, amplitudes)

# get the actual amplitude values for the interpolated mass values
actual_amplitudes = freq_df[freq_df['Mass'].isin(new_masses)]['Amplitude'].values

# calculate the RMSE between the interpolated and actual values
rmse = np.sqrt(np.mean((new_amplitudes - actual_amplitudes)**2))

print(f"RMSE for frequency {freq}: {rmse:.4f}")


Linear Regression

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

X = df[['Frequency', 'Amplitude']]
y = df['Mass']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2)

model = LinearRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse:.2f}')


Polynomial Regression

In [None]:
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split, cross_val_score
import numpy as np

# split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# find the best degree of polynomial features using cross-validation
degrees = [1, 2, 3, 4, 5]
best_score = float('inf')
best_degree = None
for degree in degrees:
    poly = PolynomialFeatures(degree=degree)
    X_poly = poly.fit_transform(X_train)
    model = LinearRegression()
    scores = cross_val_score(model, X_poly, y_train, cv=5, scoring='neg_mean_squared_error')
    avg_score = np.mean(scores)
    if avg_score < best_score:
        best_score = avg_score
        best_degree = degree

print("Best degree: ", best_degree)

# create polynomial features with the best degree
poly = PolynomialFeatures(degree=best_degree)
X_train_poly = poly.fit_transform(X_train)
X_test_poly = poly.transform(X_test)

# fit the model on the training set
model = LinearRegression()
model.fit(X_train_poly, y_train)

# make predictions on the test set
y_pred = model.predict(X_test_poly)

# evaluate the model on the test set
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
print("RMSE: ", rmse)


In [None]:
# make a prediction on a sample value
sample_X = np.array([[12, 0.007]])
sample_X_poly = poly.transform(sample_X)
sample_y_pred = model.predict(sample_X_poly)
print("Predicted mass: ", sample_y_pred[0])

Ridge Regression

In [None]:
from sklearn.linear_model import RidgeCV
from sklearn.metrics import mean_squared_error
import numpy as np

# create a range of alpha values to try
alphas = np.logspace(-4, 4, 100)

# use cross-validation to find the best alpha value
ridge = RidgeCV(alphas=alphas, cv=5)
ridge.fit(X, y)

# make predictions
y_pred = ridge.predict(X)

# evaluate the model
mse = mean_squared_error(y, y_pred)
rmse = np.sqrt(mse)
print("RMSE: ", rmse)


Lasso Regression

In [None]:
from sklearn.linear_model import LassoCV
from sklearn.metrics import mean_squared_error
import numpy as np

# create a range of alpha values to try
alphas = np.logspace(-4, 4, 100)

# use cross-validation to find the best alpha value
lasso = LassoCV(alphas=alphas, cv=5)
lasso.fit(X, y)

# make predictions
y_pred = lasso.predict(X)

# evaluate the model
mse = mean_squared_error(y, y_pred)
rmse = np.sqrt(mse)
print("RMSE: ", rmse)


Elastic Net Regression

In [None]:
from sklearn.linear_model import ElasticNetCV
from sklearn.metrics import mean_squared_error
import numpy as np

# create a range of alpha values to try
alphas = np.logspace(-4, 4, 100)

# create a range of l1_ratio values to try
l1_ratios = np.linspace(0, 1, 100)

# use cross-validation to find the best alpha and l1_ratio values
elastic_net = ElasticNetCV(alphas=alphas, l1_ratio=l1_ratios, cv=5)
elastic_net.fit(X, y)

# make predictions
y_pred = elastic_net.predict(X)

# evaluate the model
mse = mean_squared_error(y, y_pred)
rmse = np.sqrt(mse)
print("RMSE: ", rmse)


Random Forest Regression

In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
import numpy as np
from sklearn.model_selection import cross_val_score
# create a range of n_estimators values to try
n_estimators = [10, 50, 100, 200, 500]

X = df[['Frequency', 'Amplitude']]
y = df['Mass']

# find the best n_estimators value using cross-validation
best_rmse = float('inf')
best_n = 0
for n in n_estimators:
    rf = RandomForestRegressor(n_estimators=n, random_state=42)
    scores = cross_val_score(rf, X, y, cv=10, scoring='neg_mean_squared_error')
    rmse_scores = np.sqrt(-scores)
    avg_rmse = np.mean(rmse_scores)
    if avg_rmse < best_rmse:
        best_rmse = avg_rmse
        best_n = n

print("Best number of estimators: ", best_n)
print("Best RMSE: ", best_rmse)

# train the model on the full dataset using the best number of estimators
rf = RandomForestRegressor(n_estimators=best_n, random_state=42)
rf.fit(X, y)

# test the model on a new sample value
sample_X = np.array([[17, 0.0508]])  # replace with your sample value
sample_y_pred = rf.predict(sample_X)
print("Predicted mass: ", sample_y_pred[0])


Decision Tree

In [None]:
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error
import numpy as np
# create a Decision Tree regressor
model = DecisionTreeRegressor(random_state=42)

# fit the model to the training data
model.fit(X_train, y_train)

# make predictions on the test data
y_pred = model.predict(X_test)

# evaluate the model using mean squared error
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
print("RMSE: ", rmse)


In [None]:
# prompt: test on entire data and add result coloumn and print

# Test the model on the entire dataset
y_pred = model.predict(X)

# Add a new column to the DataFrame with the predicted values
df['Predicted Mass'] = y_pred

# Print the DataFrame
df


Random Forest

In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

# create a Random Forest regressor with 100 trees
model = RandomForestRegressor(n_estimators=100, random_state=42)

# fit the model to the training data
model.fit(X_train, y_train)

# make predictions on the test data
y_pred = model.predict(X_test)

# evaluate the model using mean squared error
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
print("RMSE: ", rmse)


Gradient Boosting

In [None]:
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error

# create a Gradient Boosting regressor with 100 trees
model = GradientBoostingRegressor(n_estimators=100, random_state=42)

# fit the model to the training data
model.fit(X_train, y_train)

# make predictions on the test data
y_pred = model.predict(X_test)

# evaluate the model using mean squared error
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
print("RMSE: ", rmse)


SVM

In [None]:
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error

# create an SVM regressor with a radial basis function kernel
model = SVR(kernel='rbf', C=1.0, epsilon=0.1, gamma='auto')

# fit the model to the training data
model.fit(X_train, y_train)

# make predictions on the test data
y_pred = model.predict(X_test)

# evaluate the model using mean squared error
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
print("RMSE: ", rmse)


Neural Networks: feedforward neural network, also known as a multilayer perceptron (MLP).

To predict Mass

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler

X = interp_df[['Frequency', 'Amplitude']]
y = interp_df['Mass']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler_X = StandardScaler()
X_train_scaled = scaler_X.fit_transform(X_train)
X_test_scaled = scaler_X.transform(X_test)

scaler_y = MinMaxScaler()
y_train_scaled = scaler_y.fit_transform(y_train.values.reshape(-1, 1))
y_test_scaled = scaler_y.transform(y_test.values.reshape(-1, 1))

input_layer = tf.keras.Input(shape=(2,))

model = tf.keras.Sequential([
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(1)
])

model.compile(optimizer='adam', loss='mean_squared_error')

history = model.fit(X_train_scaled, y_train_scaled, epochs=6552, validation_data=(X_test_scaled, y_test_scaled), verbose=0)

loss = model.evaluate(X_test_scaled, y_test_scaled)
print('Test loss:', loss)


Neural Networks: feedforward neural network, also known as a multilayer perceptron (MLP).

To predict Mass and unbalance force

In [4]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler

# Assuming interp_df now has a new column 'UnbalanceForce'
X = interp_df[['Frequency', 'Amplitude']]
y = interp_df[['Mass', 'Unbalance Force']]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler_X = StandardScaler()
X_train_scaled = scaler_X.fit_transform(X_train)
X_test_scaled = scaler_X.transform(X_test)

scaler_y = MinMaxScaler()
y_train_scaled = scaler_y.fit_transform(y_train)
y_test_scaled = scaler_y.transform(y_test)

input_layer = tf.keras.Input(shape=(2,))

model = tf.keras.Sequential([
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(2)  # Changed to 2 neurons for two output variables
])

model.compile(optimizer='adam', loss='mean_squared_error')

history = model.fit(X_train_scaled, y_train_scaled, epochs=6552, validation_data=(X_test_scaled, y_test_scaled), verbose=0)

loss = model.evaluate(X_test_scaled, y_test_scaled)
print('Test loss:', loss)


[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 818us/step - loss: 1.1121e-04
Test loss: 0.00010528010170673952


In [7]:
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mean_squared_error'])


In [9]:
from sklearn.model_selection import GridSearchCV

# define a range of random_state values to search over
param_grid = {'random_state': [0, 10, 20, 30, 42]}

# create a grid search object with the model and parameter grid
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error')

# fit the grid search object to the training data
grid_search.fit(X_train_scaled, y_train_scaled)

# print the best random_state value and corresponding mean squared error
print("Best random_state value: ", grid_search.best_params_['random_state'])
print("Best mean squared error: ", -grid_search.best_score_)


TypeError: Cannot clone object '<Sequential name=sequential, built=True>' (type <class 'keras.src.models.sequential.Sequential'>): it does not seem to be a scikit-learn estimator as it does not implement a 'get_params' method.

Test

In [13]:
new_data = np.array([[18, 0.1818]])
new_data_scaled = scaler_X.transform(new_data)
prediction_scaled = model.predict(new_data_scaled)
prediction = scaler_y.inverse_transform(prediction_scaled)
print('Predicted mass:', prediction[0][0])
print('Predicted unbalance force:', prediction[0][1])


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
Predicted mass: 161.6523
Predicted unbalance force: 0.017109312




In [14]:
import tensorflow as tf

# Save the entire model (architecture, optimizer, and learned weights)
model.save('ffnn.keras')

import pickle

# Save the scalers
with open('scaler_X.pkl', 'wb') as file:
    pickle.dump(scaler_X, file)

with open('scaler_y.pkl', 'wb') as file:
    pickle.dump(scaler_y, file)

In [15]:
import math

rmse = math.sqrt(loss)
print('Test RMSE:', rmse)


Test RMSE: 0.01057547883794937


In [16]:
from sklearn.metrics import mean_absolute_error

y_pred = model.predict(X_test_scaled)
y_pred = scaler_y.inverse_transform(y_pred)

mae = mean_absolute_error(y_test, y_pred)
print('Test MAE:', mae)


[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 783us/step
Test MAE: 0.6063883085959927


In [17]:
from sklearn.metrics import r2_score

r2 = r2_score(y_test, y_pred)
print('R-squared:', r2)


R-squared: 0.9985561914621985


Auto encoder

In [None]:
import tensorflow as tf
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import numpy as np
X = df[['Frequency', 'Amplitude']]
y = df['Mass']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


# Assuming X_train contains only healthy data (mass = 0)
scaler_X = StandardScaler()
X_train_scaled = scaler_X.fit_transform(X_train)

# Autoencoder model
input_dim = X_train.shape[1]
encoding_dim = 16

input_layer = tf.keras.Input(shape=(input_dim,))
encoder = tf.keras.layers.Dense(encoding_dim, activation='relu')(input_layer)
decoder = tf.keras.layers.Dense(input_dim, activation='linear')(encoder)
autoencoder = tf.keras.Model(input_layer, decoder)

autoencoder.compile(optimizer='adam', loss='mean_squared_error')
history = autoencoder.fit(X_train_scaled, X_train_scaled, epochs=1000, verbose=0)

# Anomaly detection function
def detect_anomaly(autoencoder, scaler_X, input_data, threshold):
    input_data_scaled = scaler_X.transform(input_data)
    reconstruction = autoencoder.predict(input_data_scaled)
    reconstruction_error = np.mean(np.square(input_data_scaled - reconstruction), axis=1)

    is_anomaly = reconstruction_error > threshold
    return is_anomaly

# Set an appropriate threshold based on the reconstruction error distribution on the healthy data
threshold = 0.005  # This value should be determined based on the reconstruction error distribution on the healthy data

# Example usage
new_data = np.array([[8, 0.0021]])
is_anomaly = detect_anomaly(autoencoder, scaler_X, new_data, threshold)
print("Anomaly detected:" if is_anomaly else "System is healthy")


In [None]:
import numpy as np

# Calculate reconstruction errors for healthy training data
reconstruction_errors = np.mean(np.square(X_train_scaled - autoencoder.predict(X_train_scaled)), axis=1)

# Calculate the 95th percentile of the reconstruction error distribution
threshold = np.percentile(reconstruction_errors, 95)
print("Threshold:", threshold)
