In [30]:
import numpy as np
import pandas as pd 
import os
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from tabulate import tabulate
import pickle
import random

In [2]:
input_keys = ['B', 'D', 'P', 'J', 'N', 'c/R', 'r/R', 'beta']
output_keys = ['CT', 'CP', 'eta']

In [26]:
# Create geometric dataframe
path = ''
# Directories of performance data
geom1_dir = os.path.join(path, 'volume1_geom.csv')
geom2_dir = os.path.join(path, 'volume2_geom.csv')
geom3_dir = os.path.join(path, 'volume3_geom.csv')

# Geometric data from volume 1 to volume 3
geom1_df = pd.read_csv(geom1_dir)
geom2_df = pd.read_csv(geom2_dir)
geom3_df = pd.read_csv(geom3_dir)

# Merge them into 1 geom dataframe
geom_df = pd.concat([geom1_df, geom2_df, geom3_df], ignore_index=True)

In [27]:
# Create performance dataframe
# Directories of performance data
perf1_dir = os.path.join(path, 'volume1_exp.csv')
perf2_dir = os.path.join(path, 'volume2_exp.csv')
perf3_dir = os.path.join(path, 'volume3_exp.csv')

# Performance data from volume 1 to volume 3
perf1_df = pd.read_csv(perf1_dir)
perf2_df = pd.read_csv(perf2_dir)
perf3_df = pd.read_csv(perf3_dir)

# Merge them into 1 perf dataframe
perf_df = pd.concat([perf1_df, perf2_df, perf3_df], ignore_index=True)

In [28]:
# Create df dataframe
df = perf_df.merge(geom_df, on=['BladeName', 'D', 'P', 'Family'])

In [29]:
df.to_csv("full_data.csv", sep = ',', index=False,encoding='utf-8')

In [6]:
# Split df into X and y
X = df[input_keys].values
y = df[output_keys].values

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) 

In [9]:
model = DecisionTreeRegressor()

In [10]:
model.fit(X_train, y_train)

DecisionTreeRegressor()

In [11]:
y_pred = model.predict(X_test)

In [31]:
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
data = [['mae', mae], ['mse', mse], ['R²', r2]]
header = ['Metric', 'Value']
error_tab = tabulate(data, header, tablefmt='grid')
print(error_tab)

+----------+-------------+
| Metric   |       Value |
| mae      | 0.000436207 |
+----------+-------------+
| mse      | 0.000169587 |
+----------+-------------+
| R²       | 0.9979      |
+----------+-------------+


In [18]:
# Save model
filename = "model.sav"
pickle.dump(model, open(filename, 'wb'))