In [8]:
import pandas as pd
import numpy as np

# Load the dataset from the CSV file
data = pd.read_csv('fmain.csv')

# Define the window size
window_size = 10

# Create empty lists to store the extracted features
features = []
labels = []

# Iterate over the dataset to create non-overlapping windows and extract features
for i in range(0, len(data), window_size):
    if i + window_size <= len(data):
        window_data = data.iloc[i:i+window_size]
        ax_window = window_data['ax']
        ay_window = window_data['ay']
        az_window = window_data['az']
        gx_window = window_data['gx']
        gy_window = window_data['gy']
        gz_window = window_data['gz']
        s_window = window_data['s']
        w_window = window_data['w']
        
        features.append([
            np.sqrt(np.mean(ax_window**2)),
            np.sum(np.abs(ay_window)),
            np.mean(np.abs(az_window - np.mean(az_window))),
            np.var(gx_window),
            np.sum(np.abs(gy_window.diff())),
            np.sum(np.diff(gz_window > 0) != 0),
            np.sum(gz_window**2),
            np.mean(np.abs(s_window)),
            np.sqrt(np.mean(np.diff(ax_window)**2)),
            np.mean(np.diff(ay_window)),
            np.log(1 + np.mean(np.abs(az_window))),
            np.polyfit(range(window_size), gx_window, 1),
            np.polyfit(range(window_size), gz_window, 2)
        ])
        
        labels.append(np.mean(w_window))

# Create a new DataFrame with the extracted features and labels
new_data = pd.DataFrame(features, columns=[
    'RMS', 'SumAbs', 'MAD', 'Variance', 'WaveLength', 'SSC', 'SSI',
    'MeanWavelet', 'DASDV', 'AAC', 'LogDetector', 'LinearFit', 'ParabolicFit'
])
new_data['MeanW'] = labels

# Save the new dataset to a CSV file
new_data.to_csv('new_dataset.csv', index=False)


In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Load the new dataset from the CSV file
df = pd.read_csv('fmain.csv')

# Split the dataset into features (X) and labels (y)
# Split the dataset into features (X) and labels (y)
# Split data into inputs and outputs
X = df[["s", "ax", "ay", "az", "gx", "gy", "gz"]]
y = df["w"]

In [7]:
X

Unnamed: 0,s,ax,ay,az,gx,gy,gz
0,580000.00,-175.29,1582.03,-80.57,-21.71,-81.34,66.22
1,580000.00,-131.35,972.66,264.65,-20.30,-14.39,59.88
2,580000.00,36.13,951.17,250.98,4.88,-6.77,66.59
3,580000.00,247.07,1138.67,-3.42,-8.66,-8.11,-1.83
4,580000.00,16.60,969.24,190.43,-5.67,-2.99,-0.06
...,...,...,...,...,...,...,...
1765,674534.25,434.57,1640.63,734.37,22.20,-26.89,154.02
1766,580000.00,1215.82,2614.75,342.29,16.04,-51.83,224.21
1767,580000.00,-1114.75,1061.04,-664.06,-85.43,-37.99,190.30
1768,580000.00,-552.73,372.56,87.89,8.96,123.84,-176.65


In [8]:
y

0        0.544
1        1.094
2        1.862
3        4.956
4        5.932
         ...  
1765    52.108
1766    49.516
1767    47.186
1768    22.860
1769    14.006
Name: w, Length: 1770, dtype: float64

In [9]:
from sklearn.preprocessing import MinMaxScaler

# Create a MinMaxScaler object
scaler = MinMaxScaler()

# Apply Min-Max scaling to your input features
X = scaler.fit_transform(X)

# Normalize the output features
data[['MeanW']] = scaler.fit_transform(data[['MeanW']])
y = data[['MeanW']]

In [10]:
X

array([[0.        , 0.38991447, 0.58422681, ..., 0.46099108, 0.13831371,
        0.56812446],
       [0.        , 0.40009079, 0.47943962, ..., 0.46359044, 0.31952903,
        0.55908397],
       [0.        , 0.43887843, 0.4757442 , ..., 0.51001032, 0.34015428,
        0.56865206],
       ...,
       [0.        , 0.1723396 , 0.49463743, ..., 0.34352186, 0.25565029,
        0.74505554],
       [0.        , 0.302501  , 0.37624649, ..., 0.51753189, 0.69367979,
        0.22180553],
       [0.        , 0.29673195, 0.39337542, ..., 0.51539341, 0.73097848,
        0.15347431]])

In [11]:
len(X)

1770

In [12]:
y

Unnamed: 0,MeanW
0,0.032917
1,0.590614
2,0.684530
3,0.727718
4,0.635811
...,...
172,0.402394
173,0.545709
174,0.386768
175,0.295235


In [13]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

ValueError: Found input variables with inconsistent numbers of samples: [1770, 177]

In [None]:
X_train_scaled

In [51]:
# Create a Random Forest Regressor model
model = RandomForestRegressor(n_estimators=100, random_state=42)

# Train the model
model.fit(X_train_scaled, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test_scaled)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

# Print the evaluation metrics
print("Mean Squared Error:", mse)
print("R-squared Score:", r2)

  model.fit(X_train_scaled, y_train)


Mean Squared Error: 0.025102003352691832
R-squared Score: 0.38130015967223174


In [52]:
y_test

Unnamed: 0,MeanW
19,0.460849
45,0.706476
139,0.525405
30,0.269622
67,0.507509
16,0.65231
119,0.588641
173,0.545709
109,0.413254
140,0.645956


In [53]:
y_pred

array([0.4385827 , 0.58000112, 0.55791124, 0.53926819, 0.51796128,
       0.61460465, 0.34078641, 0.52020418, 0.40037327, 0.63626319,
       0.59418185, 0.41795099, 0.54305024, 0.48330577, 0.64069541,
       0.49553776, 0.56322757, 0.59867032, 0.2864883 , 0.5145497 ,
       0.63194614, 0.38147863, 0.26034469, 0.51567247, 0.36558813,
       0.30297539, 0.3936108 , 0.5829564 , 0.44999956, 0.42317543,
       0.36005725, 0.52229069, 0.31098971, 0.57027834, 0.21864684,
       0.33888077])

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

In [2]:
df = pd.read_csv("fmain.csv")
df.head()

Unnamed: 0,matching_values,ax,ay,az,gx,gy,gz,s,w
0,19.58,-175.29,1582.03,-80.57,-21.71,-81.34,66.22,580000.0,0.544
1,19.61,-131.35,972.66,264.65,-20.3,-14.39,59.88,580000.0,1.094
2,19.64,36.13,951.17,250.98,4.88,-6.77,66.59,580000.0,1.862
3,19.74,247.07,1138.67,-3.42,-8.66,-8.11,-1.83,580000.0,4.956
4,19.76,16.6,969.24,190.43,-5.67,-2.99,-0.06,580000.0,5.932


In [3]:
df.shape

(1770, 9)

In [4]:
df.drop(columns=["matching_values"], inplace=True)

In [5]:
df.head()

Unnamed: 0,ax,ay,az,gx,gy,gz,s,w
0,-175.29,1582.03,-80.57,-21.71,-81.34,66.22,580000.0,0.544
1,-131.35,972.66,264.65,-20.3,-14.39,59.88,580000.0,1.094
2,36.13,951.17,250.98,4.88,-6.77,66.59,580000.0,1.862
3,247.07,1138.67,-3.42,-8.66,-8.11,-1.83,580000.0,4.956
4,16.6,969.24,190.43,-5.67,-2.99,-0.06,580000.0,5.932


## Feature Engg.

In [419]:
# Create interaction features
df['s_ax'] = df['s'] * df['ax']
df['s_ay'] = df['s'] * df['ay']
df['s_az'] = df['s'] * df['az']

df['s_gx'] = df['s'] * df['gx']
df['s_gy'] = df['s'] * df['gy']
df['s_gz'] = df['s'] * df['gz']

In [420]:
# Split data into inputs and outputs
X = df[["s", "ax", "ay", "az", "gx", "gy", "gz", "s_ax", "s_ay", "s_az", "s_gx", "s_gy", "s_gz"]]
y = df["w"]

In [421]:
X

Unnamed: 0,s,ax,ay,az,gx,gy,gz,s_ax,s_ay,s_az,s_gx,s_gy,s_gz
0,580000.00,-175.29,1582.03,-80.57,-21.71,-81.34,66.22,-1.016682e+08,9.175774e+08,-4.673060e+07,-12591800.00,-4.717720e+07,3.840760e+07
1,580000.00,-131.35,972.66,264.65,-20.30,-14.39,59.88,-7.618300e+07,5.641428e+08,1.534970e+08,-11774000.00,-8.346200e+06,3.473040e+07
2,580000.00,36.13,951.17,250.98,4.88,-6.77,66.59,2.095540e+07,5.516786e+08,1.455684e+08,2830400.00,-3.926600e+06,3.862220e+07
3,580000.00,247.07,1138.67,-3.42,-8.66,-8.11,-1.83,1.433006e+08,6.604286e+08,-1.983600e+06,-5022800.00,-4.703800e+06,-1.061400e+06
4,580000.00,16.60,969.24,190.43,-5.67,-2.99,-0.06,9.628000e+06,5.621592e+08,1.104494e+08,-3288600.00,-1.734200e+06,-3.480000e+04
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1765,674534.25,434.57,1640.63,734.37,22.20,-26.89,154.02,2.931323e+08,1.106661e+09,4.953577e+08,14974660.35,-1.813823e+07,1.038918e+08
1766,580000.00,1215.82,2614.75,342.29,16.04,-51.83,224.21,7.051756e+08,1.516555e+09,1.985282e+08,9303200.00,-3.006140e+07,1.300418e+08
1767,580000.00,-1114.75,1061.04,-664.06,-85.43,-37.99,190.30,-6.465550e+08,6.154032e+08,-3.851548e+08,-49549400.00,-2.203420e+07,1.103740e+08
1768,580000.00,-552.73,372.56,87.89,8.96,123.84,-176.65,-3.205834e+08,2.160848e+08,5.097620e+07,5196800.00,7.182720e+07,-1.024570e+08


In [423]:
# Example: Applying dimensionality reduction using Principal Component Analysis (PCA)
from sklearn.decomposition import PCA

# Specify the desired number of principal components
n_components = 10

# Create PCA object and fit-transform the data
pca = PCA(n_components=n_components)
X = pca.fit_transform(X)

ValueError: n_components=10 must be between 0 and min(n_samples, n_features)=8 with svd_solver='full'

In [None]:
X

In [394]:
from sklearn.preprocessing import MinMaxScaler

# Create a MinMaxScaler object
scaler = MinMaxScaler()

# Apply Min-Max scaling to your input features
X = scaler.fit_transform(X)
# Normalize the output features
df[['w']] = scaler.fit_transform(df[['w']])
y = df["w"]

In [395]:
X

array([[0.        , 0.38991447, 0.58422681, ..., 0.41197096, 0.13831371,
        0.56812446],
       [0.        , 0.40009079, 0.47943962, ..., 0.41429392, 0.31952903,
        0.55908397],
       [0.        , 0.43887843, 0.4757442 , ..., 0.45577768, 0.34015428,
        0.56865206],
       ...,
       [0.        , 0.1723396 , 0.49463743, ..., 0.306993  , 0.25565029,
        0.74505554],
       [0.        , 0.302501  , 0.37624649, ..., 0.46249943, 0.69367979,
        0.22180553],
       [0.        , 0.29673195, 0.39337542, ..., 0.46058835, 0.73097848,
        0.15347431]])

In [396]:
# Normalize the output features
df[['w']] = scaler.fit_transform(df[['w']])
y = df["w"]

In [397]:
y

0       0.000329
1       0.004859
2       0.011183
3       0.036662
4       0.044699
          ...   
1765    0.424955
1766    0.403610
1767    0.384423
1768    0.184100
1769    0.111188
Name: w, Length: 1770, dtype: float64

In [398]:
# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [399]:
X_train

array([[0.39690023, 0.44645624, 0.45357857, ..., 0.410333  , 0.39255274,
        0.48684727],
       [0.00580557, 0.44046254, 0.4763323 , ..., 0.44783724, 0.35782621,
        0.47291058],
       [0.43966909, 0.46850646, 0.50622409, ..., 0.3963611 , 0.3292954 ,
        0.49846276],
       ...,
       [0.22120756, 0.43831334, 0.47582846, ..., 0.44549088, 0.35383228,
        0.48402646],
       [0.32015004, 0.45063886, 0.51092547, ..., 0.36430696, 0.34917543,
        0.51655681],
       [0.26090171, 0.44860313, 0.47607952, ..., 0.45149731, 0.35328278,
        0.47474865]])

In [400]:
y_train

1067    0.808077
1274    0.445213
383     0.072912
78      0.414381
882     0.442611
          ...   
1043    0.489830
1239    0.668940
1024    0.569906
1330    0.449989
821     0.536720
Name: w, Length: 1416, dtype: float64

In [401]:
# Train and evaluate Random Forest Regression
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
rf_predictions = rf_model.predict(X_test)
rf_mse = mean_squared_error(y_test, rf_predictions)
rf_r2 = r2_score(y_test, rf_predictions)
print("Random Forest Regression:")
print("Mean Squared Error:", rf_mse)
print("R-squared Score:", rf_r2)

Random Forest Regression:
Mean Squared Error: 0.021501220263448872
R-squared Score: 0.5246057393090746


In [402]:
y_test

1600    0.319614
1473    0.587513
1361    0.452262
1218    0.831645
380     0.488315
          ...   
1587    0.039528
1759    0.193982
427     0.012418
643     0.411664
1640    0.459575
Name: w, Length: 354, dtype: float64

In [403]:
rf_predictions

array([0.34218258, 0.32442479, 0.3957521 , 0.64929377, 0.43624487,
       0.0674088 , 0.53569165, 0.4261029 , 0.3735896 , 0.40187954,
       0.46065797, 0.52671459, 0.51408897, 0.33151276, 0.18068251,
       0.40452575, 0.65697943, 0.37111781, 0.35730669, 0.37835664,
       0.40365779, 0.45730076, 0.64442957, 0.22903602, 0.50624471,
       0.56711399, 0.46544362, 0.27230973, 0.44197671, 0.42987549,
       0.43517087, 0.45817843, 0.45347794, 0.08925589, 0.2019502 ,
       0.40363654, 0.65754616, 0.37629708, 0.39867286, 0.29322529,
       0.30412109, 0.49670718, 0.32090963, 0.61081575, 0.39250259,
       0.37807665, 0.44614951, 0.4087949 , 0.49120329, 0.24990415,
       0.26400942, 0.15567699, 0.6146165 , 0.21672168, 0.5270318 ,
       0.3279495 , 0.08394634, 0.42554853, 0.23605844, 0.17364132,
       0.44743449, 0.48952682, 0.24849663, 0.49214882, 0.39644399,
       0.16116722, 0.06118814, 0.39970223, 0.20840012, 0.1376901 ,
       0.29555973, 0.45381639, 0.55021987, 0.12748868, 0.49305

In [413]:
# Train and evaluate MLP Regression
mlp_model = MLPRegressor(hidden_layer_sizes=(500, 500), activation='relu', solver='adam', random_state=42)
mlp_model.fit(X_train, y_train)
mlp_predictions = mlp_model.predict(X_test)
mlp_mse = mean_squared_error(y_test, mlp_predictions)
mlp_r2 = r2_score(y_test, mlp_predictions)
print("Multi-Layer Perceptron Regression:")
print("Mean Squared Error:", mlp_mse)
print("R-squared Score:", mlp_r2)

Multi-Layer Perceptron Regression:
Mean Squared Error: 0.023930552808504063
R-squared Score: 0.4708929390550266


In [405]:
# Train and evaluate Support Vector Regression
svr_model = SVR()
svr_model.fit(X_train, y_train)
svr_predictions = svr_model.predict(X_test)
svr_mse = mean_squared_error(y_test, svr_predictions)
svr_r2 = r2_score(y_test, svr_predictions)
print("Support Vector Regression:")
print("Mean Squared Error:", svr_mse)
print("R-squared Score:", svr_r2)

Support Vector Regression:
Mean Squared Error: 0.024649316663618623
R-squared Score: 0.455000993978104


In [406]:
y_test

1600    0.319614
1473    0.587513
1361    0.452262
1218    0.831645
380     0.488315
          ...   
1587    0.039528
1759    0.193982
427     0.012418
643     0.411664
1640    0.459575
Name: w, Length: 354, dtype: float64

In [407]:
svr_predictions

array([ 0.22193407,  0.26141085,  0.38508219,  0.60785197,  0.51141188,
        0.09005132,  0.491588  ,  0.38935595,  0.46374431,  0.46236159,
        0.50656244,  0.53357554,  0.49476717,  0.35054715,  0.29148021,
        0.34850541,  0.67803267,  0.49773561,  0.34601729,  0.41831539,
        0.44766533,  0.53902362,  0.70658909,  0.16940426,  0.52586468,
        0.55051719,  0.4808265 ,  0.2218115 ,  0.35853443,  0.38304171,
        0.45296165,  0.46521166,  0.4794599 ,  0.03963529,  0.17205568,
        0.29816868,  0.60912612,  0.37643007,  0.53412062,  0.31228566,
        0.26257788,  0.48297736,  0.38057824,  0.60402827,  0.45935967,
        0.33685196,  0.40049363,  0.44309934,  0.48866808,  0.19307435,
        0.2302347 ,  0.15948057,  0.60556078,  0.18790525,  0.39211753,
        0.31418869,  0.07201645,  0.36748239,  0.2849833 ,  0.18758479,
        0.47941743,  0.45324877,  0.20982825,  0.53881869,  0.39603764,
        0.17558874,  0.00883499,  0.34895505,  0.18579182,  0.19

In [211]:
# Example: Adding interaction features
from sklearn.preprocessing import PolynomialFeatures

# Create polynomial features up to degree 2
poly_features = PolynomialFeatures(degree=2, include_bias=False)
X_poly = poly_features.fit_transform(X)

# Continue with model training using X_poly as the input features

In [212]:
X_poly

array([[ 5.80000000e+05, -1.75290000e+02,  1.58203000e+03, ...,
         6.61619560e+03, -5.38633480e+03,  4.38508840e+03],
       [ 5.80000000e+05, -1.31350000e+02,  9.72660000e+02, ...,
         2.07072100e+02, -8.61673200e+02,  3.58561440e+03],
       [ 5.80000000e+05,  3.61300000e+01,  9.51170000e+02, ...,
         4.58329000e+01, -4.50814300e+02,  4.43422810e+03],
       ...,
       [ 5.80000000e+05, -1.11475000e+03,  1.06104000e+03, ...,
         1.44324010e+03, -7.22949700e+03,  3.62140900e+04],
       [ 5.80000000e+05, -5.52730000e+02,  3.72560000e+02, ...,
         1.53363456e+04, -2.18763360e+04,  3.12052225e+04],
       [ 5.80000000e+05, -5.77640000e+02,  4.72170000e+02, ...,
         1.89392644e+04, -3.09053234e+04,  5.04316849e+04]])

In [213]:
# Example: Applying dimensionality reduction using Principal Component Analysis (PCA)
from sklearn.decomposition import PCA

# Specify the desired number of principal components
n_components = 10

# Create PCA object and fit-transform the data
pca = PCA(n_components=n_components)
X_pca = pca.fit_transform(X_poly)

# Continue with model training using X_pca as the input features

In [214]:
X_pca

array([[-1.26112598e+11,  2.77311880e+08, -3.02973458e+08, ...,
         3.39283352e+05,  3.23229771e+05,  2.75974985e+05],
       [-1.26112819e+11, -3.00748799e+07, -6.72565158e+07, ...,
        -1.95933673e+05, -1.10008197e+05, -6.10853227e+04],
       [-1.26112821e+11, -1.06094263e+07,  1.07411715e+07, ...,
        -2.85821407e+05, -1.22635365e+05, -8.20323246e+04],
       ...,
       [-1.26112979e+11, -2.11706837e+08, -8.11312071e+08, ...,
         1.10707131e+06,  4.72284793e+05,  1.71038252e+06],
       [-1.26113148e+11, -4.53891243e+08, -2.02973694e+08, ...,
         5.26028655e+05, -4.09546033e+05,  3.82873188e+04],
       [-1.26113093e+11, -4.01481592e+08, -2.09024486e+08, ...,
         3.56723980e+05, -5.35789581e+05,  9.12947510e+04]])

In [203]:
import numpy as np

# Calculate the z-scores for each feature
z_scores = np.abs((X - np.mean(X)) / np.std(X))

# Set a threshold value for z-scores
threshold = 3

# Identify the indices of outliers
outlier_indices = np.where(z_scores > threshold)[0]

# Remove outliers from X and y
X_filtered = np.delete(X, outlier_indices, axis=0)
y_filtered = np.delete(y, outlier_indices)

# Print the number of removed outliers
print("Number of outliers removed:", len(outlier_indices))


  return mean(axis=axis, dtype=dtype, out=out, **kwargs)


ValueError: Shape of passed values is (1580, 7), indices imply (1770, 7)

In [165]:
# Create interaction features
df['s_ax'] = df['s'] * df['ax']
df['s_ay'] = df['s'] * df['ay']
df['s_az'] = df['s'] * df['az']

df['s_gx'] = df['s'] * df['gx']
df['s_gy'] = df['s'] * df['gy']
df['s_gz'] = df['s'] * df['gz']

In [166]:
# Split data into inputs and outputs
X = df[["s", "ax", "ay", "az", "gx", "gy", "gz", "s_ax", "s_ay", "s_az", "s_gx", "s_gy", "s_gz"]]
y = df["w"]

In [97]:
from sklearn.preprocessing import MinMaxScaler

# Normalize the input features
scaler = MinMaxScaler()
df[['s', 'ax', 'ay', 'az', 'gx', 'gy', 'gz', 's_ax', 's_ay', 's_az', 's_gx', 's_gy', 's_gz']] = scaler.fit_transform(df[['s', 'ax', 'ay', 'az', 'gx', 'gy', 'gz', 's_ax', 's_ay', 's_az', 's_gx', 's_gy', 's_gz']])

# Normalize the output features
df[['w']] = scaler.fit_transform(df[['w']])


# access the mean of the normalization
print(scaler.data_min_)

# access the standard deviation of the normalization
print(scaler.data_range_)

[0.504]
[121.434]


In [98]:
df.head()

Unnamed: 0,ax,ay,az,gx,gy,gz,s,w,s_ax,s_ay,s_az,s_gx,s_gy,s_gz
0,0.389914,0.584227,0.30013,0.460991,0.138314,0.568124,0.0,0.000329,0.396145,0.43021,0.24264,0.411971,0.138314,0.568124
1,0.400091,0.47944,0.361689,0.46359,0.319529,0.559084,0.0,0.004859,0.40376,0.353047,0.290753,0.414294,0.319529,0.559084
2,0.438878,0.475744,0.359251,0.51001,0.340154,0.568652,0.0,0.011183,0.432782,0.350326,0.288848,0.455778,0.340154,0.568652
3,0.487731,0.507987,0.313887,0.485049,0.336527,0.471089,0.0,0.036662,0.469336,0.374068,0.253392,0.433471,0.336527,0.471089
4,0.434355,0.478852,0.348454,0.490561,0.350386,0.473613,0.0,0.044699,0.429398,0.352614,0.280409,0.438397,0.350386,0.473613


KeyError: "['s_ax', 's_ay', 's_az', 's_gx', 's_gy', 's_gz'] not in index"

In [114]:
X

Unnamed: 0,s,ax,ay,az,gx,gy,gz,s_ax,s_ay,s_az,s_gx,s_gy,s_gz
0,0.000000,0.389914,0.584227,0.300130,0.460991,0.138314,0.568124,0.396145,0.430210,0.242640,0.411971,0.138314,0.568124
1,0.000000,0.400091,0.479440,0.361689,0.463590,0.319529,0.559084,0.403760,0.353047,0.290753,0.414294,0.319529,0.559084
2,0.000000,0.438878,0.475744,0.359251,0.510010,0.340154,0.568652,0.432782,0.350326,0.288848,0.455778,0.340154,0.568652
3,0.000000,0.487731,0.507987,0.313887,0.485049,0.336527,0.471089,0.469336,0.374068,0.253392,0.433471,0.336527,0.471089
4,0.000000,0.434355,0.478852,0.348454,0.490561,0.350386,0.473613,0.429398,0.352614,0.280409,0.438397,0.350386,0.473613
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1765,0.205091,0.531155,0.594304,0.445449,0.541940,0.285695,0.693322,0.514102,0.471491,0.372898,0.490273,0.273832,0.729119
1766,0.000000,0.712090,0.761813,0.375534,0.530584,0.218189,0.793409,0.637211,0.560980,0.301573,0.474164,0.218189,0.793409
1767,0.000000,0.172340,0.494637,0.196082,0.343522,0.255650,0.745056,0.233346,0.364238,0.161320,0.306993,0.255650,0.745056
1768,0.000000,0.302501,0.376246,0.330169,0.517532,0.693680,0.221806,0.330739,0.277058,0.266118,0.462499,0.693680,0.221806


In [115]:
X = X[["s", "s_ay", "s_gz", "gz", "s_az", "gy", "s_gy", "gx"]]

In [116]:
X

Unnamed: 0,s,s_ay,s_gz,gz,s_az,gy,s_gy,gx
0,0.000000,0.430210,0.568124,0.568124,0.242640,0.138314,0.138314,0.460991
1,0.000000,0.353047,0.559084,0.559084,0.290753,0.319529,0.319529,0.463590
2,0.000000,0.350326,0.568652,0.568652,0.288848,0.340154,0.340154,0.510010
3,0.000000,0.374068,0.471089,0.471089,0.253392,0.336527,0.336527,0.485049
4,0.000000,0.352614,0.473613,0.473613,0.280409,0.350386,0.350386,0.490561
...,...,...,...,...,...,...,...,...
1765,0.205091,0.471491,0.729119,0.693322,0.372898,0.285695,0.273832,0.541940
1766,0.000000,0.560980,0.793409,0.793409,0.301573,0.218189,0.218189,0.530584
1767,0.000000,0.364238,0.745056,0.745056,0.161320,0.255650,0.255650,0.343522
1768,0.000000,0.277058,0.221806,0.221806,0.266118,0.693680,0.693680,0.517532


In [117]:
y

0       0.000329
1       0.004859
2       0.011183
3       0.036662
4       0.044699
          ...   
1765    0.424955
1766    0.403610
1767    0.384423
1768    0.184100
1769    0.111188
Name: w, Length: 1770, dtype: float64

In [118]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [119]:
X_train

Unnamed: 0,s,s_ay,s_gz,gz,s_az,gy,s_gy,gx
670,0.340861,0.395447,0.487399,0.484479,0.282150,0.321992,0.312108,0.464826
792,0.002544,0.350756,0.473441,0.473442,0.281485,0.356828,0.356824,0.502138
1616,0.181428,0.373140,0.461462,0.463004,0.282990,0.375477,0.377928,0.540576
1169,0.340208,0.281566,0.486741,0.483965,0.372132,0.342130,0.337710,0.609819
450,0.278335,0.387463,0.469554,0.470305,0.273898,0.364596,0.365949,0.496516
...,...,...,...,...,...,...,...,...
1130,0.129112,0.359617,0.470240,0.470561,0.284108,0.355501,0.355196,0.511909
1294,0.075496,0.366596,0.469179,0.469435,0.274643,0.353850,0.353573,0.457396
860,0.249267,0.372407,0.471307,0.471702,0.283138,0.352713,0.351571,0.487870
1459,0.141143,0.362049,0.468957,0.469435,0.278542,0.352876,0.352247,0.504056


In [121]:
# Train and evaluate Random Forest Regression
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
rf_predictions = rf_model.predict(X_test)
rf_mse = mean_squared_error(y_test, rf_predictions)
rf_r2 = r2_score(y_test, rf_predictions)
print("Random Forest Regression:")
print("Mean Squared Error:", rf_mse)
print("R-squared Score:", rf_r2)

Random Forest Regression:
Mean Squared Error: 0.027967191218788236
R-squared Score: 0.39759331241570206


In [122]:
rf_predictions

array([0.4568205 , 0.49873759, 0.42730718, 0.41795691, 0.37023124,
       0.42468188, 0.57291335, 0.46058501, 0.35605564, 0.40707578,
       0.3280335 , 0.44009684, 0.32939062, 0.25458801, 0.33534694,
       0.43802741, 0.11510582, 0.32368776, 0.14058534, 0.1206163 ,
       0.15165489, 0.38271818, 0.39628111, 0.48498987, 0.11631273,
       0.42293246, 0.45153104, 0.40463215, 0.390851  , 0.32584499,
       0.50304676, 0.20658382, 0.44671591, 0.49418021, 0.19406789,
       0.16504702, 0.15721281, 0.48101059, 0.47112851, 0.34460036,
       0.45151951, 0.27551542, 0.45908609, 0.75131775, 0.38585273,
       0.35051814, 0.24321541, 0.52523544, 0.32320009, 0.44186241,
       0.48242024, 0.46165835, 0.36945155, 0.37319433, 0.52764382,
       0.19580678, 0.5527335 , 0.74658168, 0.44632376, 0.40102344,
       0.68225341, 0.5443622 , 0.43439531, 0.06139384, 0.0433139 ,
       0.54126143, 0.28883953, 0.41172835, 0.50467414, 0.38454617,
       0.44854192, 0.61107235, 0.26929081, 0.59489616, 0.49216

In [123]:
y_test

974     0.698948
275     0.140982
411     0.118929
962     0.364198
518     0.441293
          ...   
1601    0.379021
198     0.246669
15      0.510187
265     0.716159
538     0.213136
Name: w, Length: 354, dtype: float64

In [167]:
from sklearn.preprocessing import MinMaxScaler

# Create a MinMaxScaler object
scaler = MinMaxScaler()

# Apply Min-Max scaling to your input features
X = scaler.fit_transform(X)

In [168]:
X

array([[0.        , 0.38991447, 0.58422681, ..., 0.41197096, 0.13831371,
        0.56812446],
       [0.        , 0.40009079, 0.47943962, ..., 0.41429392, 0.31952903,
        0.55908397],
       [0.        , 0.43887843, 0.4757442 , ..., 0.45577768, 0.34015428,
        0.56865206],
       ...,
       [0.        , 0.1723396 , 0.49463743, ..., 0.306993  , 0.25565029,
        0.74505554],
       [0.        , 0.302501  , 0.37624649, ..., 0.46249943, 0.69367979,
        0.22180553],
       [0.        , 0.29673195, 0.39337542, ..., 0.46058835, 0.73097848,
        0.15347431]])

In [142]:
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.feature_selection import SelectKBest, f_regression

# Apply feature scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [143]:
X_scaled

array([[-1.0375008 , -0.85609084,  1.48110518, ..., -0.46863082,
        -2.63668608,  0.64768102],
       [-1.0375008 , -0.712613  , -0.0056393 , ..., -0.43243963,
        -0.4672357 ,  0.56482051],
       [-1.0375008 , -0.16573842, -0.05807073, ...,  0.21386818,
        -0.22031693,  0.65251673],
       ...,
       [-1.0375008 , -3.92372157,  0.20999074, ..., -2.10416433,
        -1.23197101,  2.26934226],
       [-1.0375008 , -2.08855049, -1.46976678, ...,  0.31859161,
         4.01197453, -2.52650389],
       [-1.0375008 , -2.16988943, -1.22673772, ...,  0.2888173 ,
         4.45850218, -3.15279343]])

In [169]:
# Generate polynomial features
poly = PolynomialFeatures(degree=2, include_bias=False)
X_poly = poly.fit_transform(X)

In [170]:
X_poly

array([[0.        , 0.38991447, 0.58422681, ..., 0.01913068, 0.0785794 ,
        0.3227654 ],
       [0.        , 0.40009079, 0.47943962, ..., 0.1020988 , 0.17864356,
        0.31257489],
       [0.        , 0.43887843, 0.4757442 , ..., 0.11570494, 0.19342943,
        0.32336516],
       ...,
       [0.        , 0.1723396 , 0.49463743, ..., 0.06535707, 0.19047367,
        0.55510776],
       [0.        , 0.302501  , 0.37624649, ..., 0.48119166, 0.15386201,
        0.04919769],
       [0.        , 0.29673195, 0.39337542, ..., 0.53432954, 0.11218642,
        0.02355436]])

In [171]:
# Perform feature selection using correlation and F-test
selector = SelectKBest(score_func=f_regression, k=5)  # Select top 5 features
X_selected = selector.fit_transform(X_poly, y)

In [172]:
X_selected

array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       ...,
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])

## Model Training

In [53]:
# Split data into inputs and outputs
X = df[["s", "ax", "ay", "az", "gx", "gy", "gz", "s_ax", "s_ay", "s_az", "s_gx", "s_gy", "s_gz"]]
y = df["w"]

In [173]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error, r2_score

In [215]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_pca, y, test_size=0.2, random_state=42)

In [216]:
X_train

array([[ 8.08265225e+10,  1.99906168e+07, -5.37310241e+07, ...,
        -9.46007394e+04,  8.07686871e+04, -1.38015994e+04],
       [-1.24751064e+11, -1.42333464e+07, -5.42400653e+06, ...,
        -2.20635344e+05, -3.34217031e+04, -5.41908008e+04],
       [-2.21119868e+10,  5.02058323e+06, -2.22976661e+07, ...,
        -1.90259968e+05, -7.36476671e+04, -9.68241674e+03],
       ...,
       [ 2.03683481e+10, -2.83933311e+07, -1.20814067e+07, ...,
        -1.24766796e+05,  1.80514524e+04, -3.46204533e+04],
       [-4.64126715e+10, -2.38554327e+07, -1.35807837e+07, ...,
        -1.51932014e+05,  1.07049857e+04, -3.22340914e+04],
       [-1.26112811e+11, -1.36942975e+06, -2.02409246e+08, ...,
         1.25756306e+05,  8.06497440e+04,  2.12765074e+05]])

In [217]:
# Train and evaluate Linear Regression
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)
lr_predictions = lr_model.predict(X_test)
lr_mse = mean_squared_error(y_test, lr_predictions)
lr_r2 = r2_score(y_test, lr_predictions)
print("Linear Regression:")
print("Mean Squared Error:", lr_mse)
print("R-squared Score:", lr_r2)

Linear Regression:
Mean Squared Error: 592.4176610291836
R-squared Score: 0.13465691356405252


In [218]:
# Train and evaluate Random Forest Regression
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
rf_predictions = rf_model.predict(X_test)
rf_mse = mean_squared_error(y_test, rf_predictions)
rf_r2 = r2_score(y_test, rf_predictions)
print("Random Forest Regression:")
print("Mean Squared Error:", rf_mse)
print("R-squared Score:", rf_r2)

Random Forest Regression:
Mean Squared Error: 390.6981770024802
R-squared Score: 0.4293080902333708


In [219]:
y_test

974     85.380
275     17.624
411     14.946
962     44.730
518     54.092
         ...  
1601    46.530
198     30.458
15      62.458
265     87.470
538     26.386
Name: w, Length: 354, dtype: float64

In [220]:
rf_predictions

array([60.06222, 51.71498, 45.94312, 46.76872, 36.36998, 55.70746,
       70.16168, 61.98646, 46.7766 , 56.41358, 48.7069 , 51.45456,
       30.00878, 19.04378, 47.59798, 44.1994 , 18.73298, 42.56496,
       24.5452 ,  8.45332, 14.17774, 50.0162 , 56.32702, 58.8833 ,
       10.7417 , 52.52526, 61.80928, 41.60844, 45.60384, 41.26544,
       64.1034 , 29.71312, 62.40734, 53.20142, 29.47132, 25.86762,
       18.3977 , 51.78978, 57.845  , 38.66836, 57.98402, 22.01492,
       56.63426, 83.30044, 52.74486, 42.68346, 41.1525 , 64.08996,
       22.57842, 53.8631 , 52.89644, 54.44258, 44.2902 , 63.88288,
       63.79836, 20.33212, 68.9027 , 82.47792, 53.64846, 51.25576,
       80.86772, 63.16198, 55.4209 ,  9.057  ,  7.97684, 63.58378,
       40.18022, 45.10742, 57.88388, 46.97594, 55.96374, 64.9663 ,
       29.24628, 70.18142, 55.7696 , 12.10432, 10.03384, 51.35776,
       47.87106, 37.97672, 65.12654, 71.17148, 15.50192, 17.5476 ,
       57.77074, 24.64262,  7.75154, 27.42486, 53.1742 , 44.54

In [221]:
# Train and evaluate Support Vector Regression
svr_model = SVR()
svr_model.fit(X_train, y_train)
svr_predictions = svr_model.predict(X_test)
svr_mse = mean_squared_error(y_test, svr_predictions)
svr_r2 = r2_score(y_test, svr_predictions)
print("Support Vector Regression:")
print("Mean Squared Error:", svr_mse)
print("R-squared Score:", svr_r2)

Support Vector Regression:
Mean Squared Error: 464.3394124327341
R-squared Score: 0.32174051055408615


In [222]:
# Train and evaluate MLP Regression
mlp_model = MLPRegressor(hidden_layer_sizes=(100, 100), activation='relu', solver='adam', random_state=42)
mlp_model.fit(X_train, y_train)
mlp_predictions = mlp_model.predict(X_test)
mlp_mse = mean_squared_error(y_test, mlp_predictions)
mlp_r2 = r2_score(y_test, mlp_predictions)
print("Multi-Layer Perceptron Regression:")
print("Mean Squared Error:", mlp_mse)
print("R-squared Score:", mlp_r2)

Multi-Layer Perceptron Regression:
Mean Squared Error: 616510499149867.4
R-squared Score: -900535438506.516
