<font size = 8 color = steelblue center> Machine Learning

In [None]:
import numpy as np 
import pandas as pd

### Concat Experiment data

In [None]:
exp1 = pd.read_csv('experiment_vol1.csv')
exp2 = pd.read_csv('experiment_vol2.csv')
exp3 = pd.read_csv('experiment_vol3.csv')
all_exp = pd.concat([exp1,exp2,exp3])
all_exp.head()

In [None]:
print(exp1.shape)
print(exp2.shape)
print(exp3.shape)
print(all_exp.shape)

### Concat geom data

In [None]:
geom1 = pd.read_csv('geom_vol1.csv')
geom2 = pd.read_csv('geom_vol2.csv')
geom3 = pd.read_csv('geom_vol3.csv')
all_geom = pd.concat([geom1,geom2,geom3])
all_geom.head()

In [None]:
print(geom1.shape)
print(geom2.shape)
print(geom3.shape)
print(all_geom.shape)

### Renaming columns to python standard

In [None]:
all_geom.columns = all_geom.columns.str.replace("'",'')
all_geom.columns = all_geom.columns.str.replace(' ','_')
all_exp.columns = all_exp.columns.str.replace("'",'')
all_exp.columns = all_exp.columns.str.replace(' ','_')

### No missing value in exp data

In [None]:
all_exp.isna().sum()

### No missing values in geom data

In [None]:
all_geom.isna().sum()

### Model building for Propllers who have 2 Blades

In [None]:
all_exp.dtypes

### Propellers which have number of blades 2

In [None]:
num_2_data =  all_exp[all_exp['Number_of_Blades'] == 2].drop(columns=['Propellers_Name','Blades_Name']) # Drop Categorical columns
num_2_data.head()

### Propellers which have no. of blades other then 2

In [None]:
num_other = all_exp[all_exp['Number_of_Blades'] != 2].drop(columns=['Propellers_Name','Blades_Name']) # Drop Categorical column
num_other.head()

### Converting Propellers Brand column to numerical

In [None]:
dummy_data = pd.get_dummies(num_2_data,columns=['Propellers_Brand'])
dummy_data.head()

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.tree import DecisionTreeRegressor 
from sklearn.model_selection import train_test_split as split
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error,r2_score,mean_absolute_error,root_mean_squared_error,mean_absolute_percentage_error

### Models

In [None]:
models = {'Linear Regression':LinearRegression(),'Random Forest':RandomForestRegressor(),
          'K Neighbors':KNeighborsRegressor(n_neighbors=1),
          'Decision Tree':DecisionTreeRegressor(),'Gradient Boosting':GradientBoostingRegressor()}

### Model Building & Evaluation of Propellers which have 2 Blades

In [None]:
results = []
for model, item in models.items():
    X = dummy_data.drop(columns='Efficiency_Output')
    y = dummy_data.Efficiency_Output
    X_train,X_test,y_train,y_test = split(X,y,test_size=0.2,random_state=12)
    item.fit(X_train,y_train)
    y_pred = item.predict(X_test)

    results.append({
        'Models': model,
        'Score': f"{item.score(X_train,y_train)*100:.2f}%",
        'R Squared': f"{r2_score(y_test,y_pred) * 100:.2f}%",
        'MSE' : (mean_squared_error(y_test,y_pred)),
        'RMSE': (root_mean_squared_error(y_test,y_pred)),
        'MAE' : (mean_absolute_error(y_test,y_pred)),
        'MAPE': (mean_absolute_percentage_error(y_test,y_pred))
    })
 
results_2_blade = pd.DataFrame(results) 
display(results_2_blade)

### Propellers with blades other then 2 evaluation

In [None]:
dummy_data1 = pd.get_dummies(num_other,columns=['Propellers_Brand'])

data = []
for model, item in models.items():
    X = dummy_data1.drop(columns='Efficiency_Output')
    y = dummy_data1.Efficiency_Output
    X_train,X_test,y_train,y_test = split(X,y,test_size=0.2,random_state=12)
    item.fit(X_train,y_train)
    y_pred = item.predict(X_test)

    data.append({
        'Models': model,
        'Score': f"{item.score(X_train,y_train)*100:.2f}%",
        'R Squared': f"{r2_score(y_test,y_pred) * 100:.2f}%",
        'MSE' : (mean_squared_error(y_test,y_pred)),
        'RMSE': (root_mean_squared_error(y_test,y_pred)),
        'MAE' : (mean_absolute_error(y_test,y_pred)),
        'MAPE': (mean_absolute_percentage_error(y_test,y_pred))
    })
 
results_other_blades = pd.DataFrame(data) 
display(results_other_blades)

### Gradient BOOST technique for model building

In [None]:
dummy2 = pd.get_dummies(all_exp,columns=['Propellers_Brand']).drop(columns=['Propellers_Name','Blades_Name'])
dummy2.head()

In [None]:
X = dummy2.drop(columns='Power_Coefficient_Output')
y = dummy2.Power_Coefficient_Output
X_train,X_test,y_train,y_test = split(X,y,test_size=0.2, random_state=42)
gdb = GradientBoostingRegressor()
gdb.fit(X_train,y_train)
y_pred_gdb = gdb.predict(X_test)
print(f"Score   : {gdb.score(X_train,y_train) * 100:.2f}%")
print(f"R Square: {r2_score(y_test,y_pred_gdb) * 100:.2f}%")
print(f"MSE     : {mean_squared_error(y_test,y_pred_gdb)}")
print(f"MAE     : {mean_absolute_error(y_test,y_pred_gdb)}")
print(f"MAPE    : {mean_absolute_percentage_error(y_test,y_pred_gdb)}")

### Using Xgboost

In [None]:
from xgboost import XGBRegressor

xgb = XGBRegressor()
xgb.fit(X_train,y_train)
y_pred_xgb = xgb.predict(X_test)
print(f"Score   : {xgb.score(X_train,y_train) * 100:.2f}%")
print(f"R Square: {r2_score(y_test,y_pred_xgb) * 100:.2f}%")
print(f"MSE     : {mean_squared_error(y_test,y_pred_xgb)}")
print(f"MAE     : {mean_absolute_error(y_test,y_pred_xgb)}")
print(f"MAPE    : {mean_absolute_percentage_error(y_test,y_pred_xgb)}")

### Hyperparameter Tunning

In [None]:
from sklearn.model_selection import RandomizedSearchCV

X = dummy2.drop(columns='Efficiency_Output')
y = dummy2.Efficiency_Output
X_train,X_test,y_train,y_test = split(X,y,test_size=0.2, random_state=42)

In [None]:
param = {'n_estimators':[10,100,50,200],'max_depth':[1,10,20],
              'min_samples_split':[2,4,5,8,10],'min_samples_leaf':[1,2,3,4,6],'max_features':['sqrt','log2'],
              'ccp_alpha':[0,0.2,0.5,1]}

In [None]:
rf = RandomForestRegressor()
random = RandomizedSearchCV(rf,param_distributions=param,cv = 5,n_jobs= -1)
random.fit(X_train,y_train)

In [None]:
random.best_score_

In [None]:
random.score(X_train,y_train)

### There are no missing values in data set 

### Calculate Solidity

In [None]:
all_geom['Chord_distribution'] = (all_geom['Propellers_Diameter'] / 2) * all_geom['Adimensional_Chord_-_c/R']
all_geom['Radius_distribution'] = (all_geom['Propellers_Diameter'] / 2) * all_geom['Adimensional_Radius_-_r/R']
all_geom['Disc_Area'] = np.pi * (all_geom['Propellers_Diameter'] / 2) ** 2
all_exp['Disc_Area'] = np.pi * (all_exp['Propellers_Diameter'] / 2) ** 2

In [None]:
geom_group = all_geom.groupby('Blades_Name') 
result = []
for name, group in geom_group:
    group = group.sort_values('Radius_distribution')
    blade_area = np.trapezoid(group['Chord_distribution'],group['Radius_distribution'])
    result.append({
        'Blades_Name':name,
        'Blade_Area':blade_area,
        'Total_Blade_Area':blade_area})
area_df = pd.DataFrame(result)
data_sol = pd.merge(all_exp,area_df)
data_sol['Solidity'] = data_sol['Blade_Area'] / data_sol['Disc_Area']
data_sol.head()

### Model With Solidity

In [None]:
dummy3 = pd.get_dummies(data_sol,columns=['Propellers_Brand']).drop(columns=['Propellers_Name','Blades_Name'])
res = []
for model, item in models.items():
    X = dummy3.drop(columns='Efficiency_Output')
    y = dummy3.Efficiency_Output
    X_train,X_test,y_train,y_test = split(X,y,test_size=0.2,random_state=12)
    item.fit(X_train,y_train)
    y_pred = item.predict(X_test)

    res.append({
        'Models': model,
        'Score': f"{item.score(X_train,y_train)*100:.2f}%",
        'R Squared': f"{r2_score(y_test,y_pred) * 100:.2f}%",
        'MSE' : (mean_squared_error(y_test,y_pred)),
        'RMSE': (root_mean_squared_error(y_test,y_pred)),
        'MAE' : (mean_absolute_error(y_test,y_pred)),
        'MAPE': (mean_absolute_percentage_error(y_test,y_pred))
    })

res_sol = pd.DataFrame(res)
res_sol

### Model without Solidity

In [None]:
without = []
for model, item in models.items():
    X = dummy2.drop(columns='Efficiency_Output')
    y = dummy2.Efficiency_Output
    X_train,X_test,y_train,y_test = split(X,y,test_size=0.2,random_state=12)
    item.fit(X_train,y_train)
    y_pred = item.predict(X_test)

    without.append({
        'Models': model,
        'Score': f"{item.score(X_train,y_train)*100:.2f}%",
        'R Squared': f"{r2_score(y_test,y_pred) * 100:.2f}%",
        'MSE' : (mean_squared_error(y_test,y_pred)),
        'RMSE': (root_mean_squared_error(y_test,y_pred)),
        'MAE' : (mean_absolute_error(y_test,y_pred)),
        'MAPE': (mean_absolute_percentage_error(y_test,y_pred))
    })

res_without_sol = pd.DataFrame(without)
res_without_sol