In [8]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import r2_score
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error, r2_score

## Load the dataset

In [None]:
dataset = pd.read_csv('/content/MaterialStrength.csv')
dataset.head()

Unnamed: 0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15,target_feature
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,0.3,540.0,1.538462,1,0.350044,YEs,NO,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,0.3,540.0,1.560651,1,0.452416,yES,nOO,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,0.685714,475.0,1.569024,0,6.704743,yEs,NO,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,0.685714,475.0,1.569024,0,8.891596,yes,NOO,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,0.966767,331.0,1.185221,0,8.126411,YeS,no,44.3


In [3]:
dataset.tail()

Unnamed: 0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15,target_feature
1025,276.4,116.0,90.3,179.6,8.9,870.1,768.3,28,0.649783,482.7,1.1325,1,0.63234,Yes,nO,44.28
1026,322.2,0.0,115.6,196.0,10.4,817.9,813.4,28,0.608318,437.8,1.005532,1,0.898012,yES,nO,31.18
1027,148.5,139.4,108.6,192.7,6.1,892.4,780.0,28,1.297643,396.5,1.144103,1,1.181435,yes,Noo,23.7
1028,159.1,186.7,0.0,175.6,11.3,989.6,788.9,28,1.103708,345.8,1.254405,1,0.85444,Yes,NO,32.77
1029,260.9,100.5,78.3,200.6,8.6,864.5,761.5,28,0.768877,439.7,1.135259,1,0.864198,Yes,nO,32.4


## Pisahkan data menjadi fitur (x) dan target (y)

In [4]:
X = dataset.drop('target_feature', axis=1)
y = dataset['target_feature']

In [5]:
print(X.dtypes)

x1     float64
x2     float64
x3     float64
x4     float64
x5     float64
x6     float64
x7     float64
x8       int64
x9     float64
x10    float64
x11    float64
x12      int64
x13    float64
x14     object
x15     object
dtype: object


In [6]:
le =  LabelEncoder()
X['x14'] = le.fit_transform(X['x14'])
X['x15'] = le.fit_transform(X['x15'])

## Memisahkan data menjadi training set dan testing set

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Decision tree

In [9]:
def evaluate_model(y_true, y_pred, model_name):
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_true, y_pred)
    print(f"{model_name} - MSE: {mse:.4f}, RMSE: {rmse:.4f}, R-squared: {r2:.4f}")

In [11]:
dt_model = DecisionTreeRegressor(random_state=42)
dt_model.fit(X_train, y_train)
dt_pred = dt_model.predict(X_test)
evaluate_model(y_test, dt_pred, "Decision tree")

Decision tree - MSE: 20.9544, RMSE: 4.5776, R-squared: 0.9187


## K-NN dengan nilai K=5

In [12]:
knn_model = KNeighborsRegressor(n_neighbors=5)
knn_model.fit(X_train, y_train)
knn_pred = knn_model.predict(X_test)
evaluate_model(y_test, knn_pred, "k-NN")

k-NN - MSE: 72.7098, RMSE: 8.5270, R-squared: 0.7178


## Regresi linear

In [13]:
lr_model= LinearRegression()
lr_model.fit(X_train, y_train)
lr_pred = lr_model.predict(X_test)
evaluate_model(y_test, lr_pred, "Linear regression")

Linear regression - MSE: 75.4675, RMSE: 8.6872, R-squared: 0.7071


## Regresi liner basis fungsi

In [14]:
poly_model = make_pipeline(PolynomialFeatures(degree=2), LinearRegression())
poly_model.fit(X_train, y_train)
poly_pred = poly_model.predict(X_test)
evaluate_model(y_test, poly_pred, "Polynomial regression")

Polynomial regression - MSE: 34.6860, RMSE: 5.8895, R-squared: 0.8654
