In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
import numpy as np
import joblib

# Load the dataset
url = 'https://raw.githubusercontent.com/AayzStha37/GEMLabHCIResearch/main/Unity%20based%20texture%20CSVs/MOD%20Wooden%20Floor_extracted_values_with_DFT321_without_duplicates.csv'
df = pd.read_csv(url)

# Separate features and target variable
y = df['DFT321']
x = df.drop('DFT321', axis=1)

# Identify categorical columns
# categorical_cols = ['TEXTURE']  # Assuming 'TEXTURE' is the categorical column

# Split the data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=100)

# # Preprocessing categorical columns using One-Hot Encoding
# preprocessor = ColumnTransformer(
#     transformers=[('cat', OneHotEncoder(), categorical_cols)],
#     remainder='passthrough'
# )

# x_train_processed = preprocessor.fit_transform(x_train)
# x_test_processed = preprocessor.transform(x_test)

# Training the Linear Regression model
lr = LinearRegression()
lr.fit(x_train, y_train)
joblib.dump(lr,'wood_LR_model.joblib')

# Training the Random Forest model
rf = RandomForestRegressor(max_depth=32, random_state=42)
rf.fit(x_train,y_train)
joblib.dump(rf,'wood_RF_model.joblib')

['wood_RF_model.joblib']

In [None]:
## MODEL PERFOEMANCE VISUALIZATION

# Performance Model Comparision
df_models = pd.concat([lr_results, rf_results],axis=0).reset_index(drop=True)
df_models

plt.figure(figsize=(5,5))
plt.scatter(x=y_train, y=y_rf_train_pred,alpha =0.3)

z= np.polyfit(y_test, y_lr_test_pred,1)
p =np.poly1d(z)

plt.plot(y_train,p(y_train),'#f8766d')
plt.ylabel('Predicted DFT321')
plt.xlabel('Experimental DFT321')

In [58]:
##LINEAR REGRESSION
# Apply the model to make a predition
y_rf_train_pred = rf.predict(x_train)
y_rf_test_pred = rf.predict(x_test)

# Evaluating model performance
rf_train_mse = mean_squared_error(y_train, y_rf_train_pred)
rf_train_r2 = r2_score(y_train, y_rf_train_pred)

rf_test_mse = mean_squared_error(y_test, y_rf_test_pred)
rf_test_r2 = r2_score(y_test, y_rf_test_pred)

rf_results = pd.DataFrame(['Random Forest', rf_train_mse,rf_train_r2,rf_test_mse,rf_test_r2]).transpose()
rf_results.columns = ['Method','Training MSE', 'Training R2', 'Test MSE', 'TEST_R2']
rf_results

Unnamed: 0,Method,Training MSE,Training R2,Test MSE,TEST_R2
0,Random Forest,397.506729,0.01172,416.340325,9.2e-05


In [59]:
##RANDOM FOREST
# Apply the model to make a predition
y_lr_train_pred = lr.predict(x_train)
y_lr_test_pred = lr.predict(x_test)

# Evaluating model performance
lr_train_mse = mean_squared_error(y_train, y_lr_train_pred)
lr_train_r2 = r2_score(y_train, y_lr_train_pred)

lr_test_mse = mean_squared_error(y_test, y_lr_test_pred)
lr_test_r2 = r2_score(y_test, y_lr_test_pred)

lr_results = pd.DataFrame(['Linear Regression', lr_train_mse,lr_train_r2,lr_test_mse,lr_test_r2]).transpose()
lr_results.columns = ['Method','Training MSE', 'Training R2', 'Test MSE', 'TEST_R2']
lr_results

Unnamed: 0,Method,Training MSE,Training R2,Test MSE,TEST_R2
0,Linear Regression,402.056963,0.000407,415.967955,0.000987
