In [2]:
# Import libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures, LabelEncoder, OneHotEncoder
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import math

# Load dataset
file_path = '/content/MaterialStrength.csv' # Path dataset Anda
data = pd.read_csv(file_path)

# Menampilkan 5 data teratas
print(data.head())

# Identifikasi kolom numerik dan kolom kategori
# Asumsikan kolom dengan tipe object adalah kategori
categorical_cols = data.select_dtypes(include=['object']).columns

# Lakukan encoding untuk kolom kategori
label_encoders = {}
for col in categorical_cols:
    le = LabelEncoder()
    data[col] = le.fit_transform(data[col].astype(str))  # Ubah menjadi string sebelum encoding
    label_encoders[col] = le

# Menampilkan 5 data setelah encoding
print(data.head())

# Asumsikan bahwa kolom terakhir adalah target dan lainnya adalah fitur
X = data.iloc[:, :-1].values  # Fitur (semua kolom kecuali yang terakhir)
y = data.iloc[:, -1].values   # Target (kolom terakhir)

# Bagi dataset menjadi training dan testing (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model 1: Decision Tree Regression
dt_model = DecisionTreeRegressor(random_state=42)
dt_model.fit(X_train, y_train)
y_pred_dt = dt_model.predict(X_test)

# Model 2: k-Nearest Neighbors Regression (k=5)
knn_model = KNeighborsRegressor(n_neighbors=5)
knn_model.fit(X_train, y_train)
y_pred_knn = knn_model.predict(X_test)

# Model 3: Linear Regression
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)
y_pred_lr = lr_model.predict(X_test)

# Model 4: Polynomial Regression (Linear dengan basis fungsi polinomial)
poly = PolynomialFeatures(degree=2)  # Polinomial derajat 2
X_train_poly = poly.fit_transform(X_train)
X_test_poly = poly.transform(X_test)

lr_poly_model = LinearRegression()
lr_poly_model.fit(X_train_poly, y_train)
y_pred_poly = lr_poly_model.predict(X_test_poly)

# Evaluasi Model
def evaluate_model(y_test, y_pred, model_name):
    mse = mean_squared_error(y_test, y_pred)
    rmse = math.sqrt(mse)
    r2 = r2_score(y_test, y_pred)
    print(f"Evaluasi Model {model_name}:")
    print(f"RMSE: {rmse}")
    print(f"MSE: {mse}")
    print(f"R-squared: {r2}")
    print("-" * 30)

# Evaluasi setiap model
evaluate_model(y_test, y_pred_dt, "Decision Tree")
evaluate_model(y_test, y_pred_knn, "k-NN")
evaluate_model(y_test, y_pred_lr, "Linear Regression")
evaluate_model(y_test, y_pred_poly, "Polynomial Regression (Degree 2)")


      x1     x2   x3     x4   x5      x6     x7   x8        x9    x10  \
0  540.0    0.0  0.0  162.0  2.5  1040.0  676.0   28  0.300000  540.0   
1  540.0    0.0  0.0  162.0  2.5  1055.0  676.0   28  0.300000  540.0   
2  332.5  142.5  0.0  228.0  0.0   932.0  594.0  270  0.685714  475.0   
3  332.5  142.5  0.0  228.0  0.0   932.0  594.0  365  0.685714  475.0   
4  198.6  132.4  0.0  192.0  0.0   978.4  825.5  360  0.966767  331.0   

        x11  x12       x13  x14  x15  target_feature  
0  1.538462    1  0.350044  YEs   NO           79.99  
1  1.560651    1  0.452416  yES  nOO           61.89  
2  1.569024    0  6.704743  yEs   NO           40.27  
3  1.569024    0  8.891596  yes  NOO           41.05  
4  1.185221    0  8.126411  YeS   no           44.30  
      x1     x2   x3     x4   x5      x6     x7   x8        x9    x10  \
0  540.0    0.0  0.0  162.0  2.5  1040.0  676.0   28  0.300000  540.0   
1  540.0    0.0  0.0  162.0  2.5  1055.0  676.0   28  0.300000  540.0   
2  332.5  14