In [3]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)


import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [17]:
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import seaborn as sns
import matplotlib.pyplot as plt

In [10]:
# Load data
file_path = 'Turbofan_HPC_Efficiency.csv'
hpc_data = pd.read_csv(file_path)

# Define features and target
features = ['CoreNozzleGrossThrust_kN', 'BypassNozzleGrossThrust_kN', 'Sp.FuelConsumption_g/(kN*s)', 'SpecificThrust_m/s']
X = hpc_data[features]
y = hpc_data['NetThrust_kN']

In [None]:
# EDA
sns.pairplot(hpc_data, x_vars=features, y_vars='NetThrust_kN', height=7, aspect=0.7, kind='reg')
plt.show()

In [None]:
# Feature selection
correlation = hpc_data.corr()
print(correlation['NetThrust_kN'])

In [11]:
# Split data
train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.2, random_state=42)

In [12]:
# Define a random forest model
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(train_X, train_y)
rf_pred = rf_model.predict(test_X)
mae = mean_absolute_error(test_y, rf_pred)
mse = mean_squared_error(test_y, rf_pred)
r2 = r2_score(test_y, rf_pred)
print("Validation MAE for Random Forest Model: {:,.0f}".format(mae))
print("Validation MSE for Random Forest Model: {:,.0f}".format(mse))
print("Validation R2 score for Random Forest Model:{:,.0f}".format(r2))

Validation MAE for Random Forest Model: 0
Validation MSE for Random Forest Model: 0
Validation R2 score for Random Forest Model:1


In [13]:
importances = rf_model.feature_importances_
feature_names = X.columns
feature_importance_hpc = pd.DataFrame({'Feature': feature_names, 'Importance': importances}).sort_values(by='Importance', ascending=False)
print(feature_importance_hpc)

                       Feature  Importance
1   BypassNozzleGrossThrust_kN    0.285092
0     CoreNozzleGrossThrust_kN    0.254032
2  Sp.FuelConsumption_g/(kN*s)    0.237784
3           SpecificThrust_m/s    0.223092
