# Random Forest Regression Model 
### Trained on Ptera Software Data

Training Random Regression Forests with scikit-learn to Ptera Software Data

Importing the Sci Kit Modules

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

### Data Preprocessing

Loading the CSV file into Pandas

In [8]:
# Reading the Data
data = pd.read_csv("Mark3Data.csv",header = None)

# Displaying the structure of the Data 
print(pd.DataFrame(data))

         0    1   2         3          4         5          6
0      0.1  1.0 -30  0.000000  13.921393  1.855566   0.294453
1      0.1  1.0 -30  0.500000  -9.041235 -4.439285  22.045514
2      0.1  1.0 -25  0.000000  13.925155  1.909448   1.711641
3      0.1  1.0 -25  0.500000 -11.013954 -4.369710  20.869233
4      0.1  1.0 -20  0.000000  13.789185  1.963994   3.143793
...    ...  ...  ..       ...        ...       ...        ...
92152  1.2  6.0  30  0.970588 -14.949841  0.056181  -3.850756
92153  1.2  6.0  30  0.976471 -14.939992  0.041060  -3.836797
92154  1.2  6.0  30  0.982353 -14.929552  0.025673  -3.821214
92155  1.2  6.0  30  0.988235 -14.918350  0.010004  -3.804004
92156  1.2  6.0  30  0.994118 -14.906249 -0.005956  -3.785174

[92157 rows x 7 columns]


Splitting the data into Labels and Input Parameters

In [3]:
print("\n")
data.columns = ['Flapping Frequency', 'Airspeed', 'Angle Of Attack', 
                'Normalised Time', 'Lift', 'Induced Drag', 'Pitching Moment']

# Split the data into features and targets
X = data[['Flapping Frequency', 'Airspeed', 'Angle Of Attack', 'Normalised Time']]
y = data[['Lift', 'Induced Drag', 'Pitching Moment']]

print("Input Parameters : ")
print(pd.DataFrame(X))
print("\n")
print("Lebels : ")
print(pd.DataFrame(y))



Input Parameters : 
       Flapping Frequency  Airspeed  Angle Of Attack  Normalised Time
0                     0.1       1.0              -30         0.500000
1                     0.1       1.0              -25         0.000000
2                     0.1       1.0              -25         0.500000
3                     0.1       1.0              -20         0.000000
4                     0.1       1.0              -20         0.500000
...                   ...       ...              ...              ...
92151                 1.2       6.0               30         0.970588
92152                 1.2       6.0               30         0.976471
92153                 1.2       6.0               30         0.982353
92154                 1.2       6.0               30         0.988235
92155                 1.2       6.0               30         0.994118

[92156 rows x 4 columns]


Lebels : 
            Lift  Induced Drag  Pitching Moment
0      -9.041235     -4.439285        22.045514
1   

Splitting the data into Training set and Test Set

In [4]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("Training Data : ")
print("Input Parameters : ")
print(pd.DataFrame(X_train))
print("\n")
print("Lebels : ")
print(pd.DataFrame(y_train))

print("Testing Data : ")
print("Input Parameters : ")
print(pd.DataFrame(X_test))
print("\n")
print("Lebels : ")
print(pd.DataFrame(y_test))

Training Data : 
Input Parameters : 
       Flapping Frequency  Airspeed  Angle Of Attack  Normalised Time
30092                 0.7       5.0                5         0.650602
28047                 0.7       4.0                0         0.212121
36838                 0.8       4.0                0         0.802632
11792                 0.5       1.0              -15         0.416667
85564                 1.2       4.5               15         0.031250
...                   ...       ...              ...              ...
6265                  0.3       5.5                5         0.333333
54886                 1.0       2.5                0         0.627119
76820                 1.1       6.0               -5         0.692308
860                   0.1       5.5              -10         0.230769
15795                 0.5       5.0               25         0.474576

[73724 rows x 4 columns]


Lebels : 
            Lift  Induced Drag  Pitching Moment
30092 -10.207227      0.359490       

Training the Model

In [5]:
model = RandomForestRegressor(n_estimators=100, random_state=42)
# Train the model
model.fit(X, y)
print("Model Trained Successfully")

Model Trained Successfully


Predicting some values

In [6]:
# List of values for prediction
X_test = [1.2, 4, 5, 0.91]

# Correct feature names that match the ones used during training
feature_names = ['Flapping Frequency', 'Airspeed', 'Angle Of Attack', 'Normalised Time']

# Convert the list to a DataFrame with correct feature names
df = pd.DataFrame([X_test], columns=feature_names)

print(df)

# Predict using the model with the correct DataFrame
y_pred = model.predict(df)
print("Predicted Values: ", y_pred)


   Flapping Frequency  Airspeed  Angle Of Attack  Normalised Time
0                 1.2         4                5             0.91
Predicted Values:  [[-1.32003985 -0.16516727 -0.14850786]]


In [7]:
import pickle

# Save the model to a file
with open('model.pkl', 'wb') as f:
    pickle.dump(model, f)

Predicting Another set of data

In [1]:
# Make predictions on the test data
y_pred = model.predict(X_test)

# Calculate Mean Squared Error
mse = mean_squared_error(y_test, y_pred)

# Calculate Root Mean Squared Error (RMSE)
rmse = np.sqrt(mse)
range_y_test = y_test.max() - y_test.min()

# Calculate RMSE as a percentage of the range
rmse_percentage = (rmse / range_y_test) * 100

# Print the RMSE and the percentage
print("Root Mean Squared Error (RMSE):", rmse)
print("RMSE as a Percentage of the Range of y_test:", rmse_percentage, "%")

# Print RMSE
print("Root Mean Squared Error (RMSE):", rmse)

NameError: name 'model' is not defined

In [8]:
# List of values
X_test = [0.1, 4, 5, 0.33]

# Correct feature names that match the ones used during training
feature_names = ['Flapping Frequency', 'Airspeed', 'Angle Of Attack', 'Normalised Time']

# Convert the list to a DataFrame with correct feature names
df = pd.DataFrame([X_test], columns=feature_names)

print(df)

# Predict using the model with the correct DataFrame
y_pred = model.predict(df)
print("Predicted Values: ", y_pred)

   Flapping Frequency  Airspeed  Angle Of Attack  Normalised Time
0                 0.1         4                5             0.33
Predicted Values:  [[49.44738027 -1.91669426 11.10951677]]


# Verification

By Random point generation

Importing Moduels

In [9]:
# from Mark4 import Mark4Simulation
# import random as rnd

Data successfully saved to flapping_data.csv


In [10]:
# X_test = [1.15, 4, 5, 0.5]

# # Assign random values to the list
# X_test[0] = rnd.uniform(0.2, 1.2)
# X_test[1] = rnd.uniform(1, 6)
# X_test[2] =rnd.uniform(0, 30)
# # Print the values after assignment
# print(X_test[0])

# #Calling Ptera Software
# Mark4Simulation(X_test[0], X_test[1], X_test[2])

# print("\n")

# print("Extracting Random Time data from the simulation")
# # Reading the Data
# data = pd.read_csv("Data/ModelVerification.csv",header = None)

# # Displaying the structure of the Data 
# print(pd.DataFrame(data))

# #choosing a random Row

# random_row = data.sample(n=1)

# print(random_row)

# random_row_array = random_row.values.flatten()

# # Predicting with the model 
# y_pred = model.predict([random_row_array[0:4]])

# # Extract the predicted values
# y_pred_values = y_pred.flatten()  # Ensure it is 1D

# print("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
# print(random_row_array)

# # Print predicted values
# print("Predicted Values: ", y_pred_values)

# # Print actual values
# print("Actual Values: ", random_row_array[4:8])

# # Calculate Mean Squared Error for each output
# mseLift = mean_squared_error([random_row_array[4]], [y_pred_values[0]])
# mseID = mean_squared_error([random_row_array[5]], [y_pred_values[1]])
# msePM = mean_squared_error([random_row_array[6]], [y_pred_values[2]])

# # Calculate percentage error
# lift_percentage_error = (abs(random_row_array[4] - y_pred_values[0]) / abs(random_row_array[4])) * 100
# id_percentage_error = (abs(random_row_array[5] - y_pred_values[1]) / abs(random_row_array[5])) * 100
# pm_percentage_error = (abs(random_row_array[6] - y_pred_values[2]) / abs(random_row_array[6])) * 100

# # Print Mean Squared Error and its percentage for each output
# print("\nMean Squared Error of Lift: ", mseLift)
# print("Mean Absolute Percentage Error of Lift: ", lift_percentage_error)

# print("\nMean Squared Error of Induced Drag: ", mseID)
# print("Mean Absolute Percentage Error of Induced Drag: ", id_percentage_error)

# print("\nMean Squared Error of Pitching Moment: ", msePM)
# print("Mean Absolute Percentage Error of Pitching Moment: ", pm_percentage_error)


0.3369546321540822


Simulating:100% |█████████████████████████████████████████████████| Elapsed: 01:29, Remaining: 00:00


Before Second Cycle Extraction :   (269,) (269,) (269,)
Before Second Cycle Extraction value:   [ 26.36795753 -67.92437276  -4.25957361  -1.29782717   0.48458857
   1.3720168    1.64236047   1.81613101   2.49572389   3.88525706
   4.31637431   4.27436906   4.05244515   3.67854872   3.16808596
   2.53105104   1.77413116   0.90149043  -0.08491355  -1.18521588
  -2.40093003  -3.73444657  -5.18855504  -6.76596304  -8.46884501
 -10.2984202  -12.25442347 -14.33467899 -16.53475007 -18.84761953
 -21.26336262 -23.76903674 -26.34863194 -28.98306641 -31.65028378
 -34.32564055 -36.98223492 -39.59154498 -42.12405905 -44.54997488
 -46.84004914 -48.96644503 -50.90353501 -52.62865843 -54.12290595
 -55.37175213 -56.36527945 -57.0985171  -57.57150594 -57.78927095
 -57.76135139 -57.50139812 -57.0265782  -56.35688094 -55.51435011
 -54.52228976 -53.40544985 -52.19843119 -50.91160854 -49.49505213
 -47.91743215 -46.37923206 -44.88142608 -43.33834061 -41.73419118
 -40.087564   -38.48311802 -36.93410469 -35.39

