In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import PolynomialFeatures

# Load the dataset
file_path = '/content/sample_data/MaterialStrength.csv'
data = pd.read_csv(file_path)

# Data Cleaning and Preprocessing
le = LabelEncoder()
data['x14'] = le.fit_transform(data['x14'])
data['x15'] = le.fit_transform(data['x15'])
X = data.drop(columns=['target_feature'])
y = data['target_feature']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Decision Tree Regressor
dt_model = DecisionTreeRegressor(random_state=42)
dt_model.fit(X_train, y_train)
y_pred_dt = dt_model.predict(X_test)
mse_dt = mean_squared_error(y_test, y_pred_dt)
r2_dt = r2_score(y_test, y_pred_dt)

# k-NN Regressor
knn_model = KNeighborsRegressor(n_neighbors=5)
knn_model.fit(X_train, y_train)
y_pred_knn = knn_model.predict(X_test)
mse_knn = mean_squared_error(y_test, y_pred_knn)
r2_knn = r2_score(y_test, y_pred_knn)

# Linear Regression
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)
y_pred_lr = lr_model.predict(X_test)
mse_lr = mean_squared_error(y_test, y_pred_lr)
r2_lr = r2_score(y_test, y_pred_lr)

# Polynomial Linear Regression
poly = PolynomialFeatures(degree=2)
X_poly_train = poly.fit_transform(X_train)
X_poly_test = poly.transform(X_test)
lr_poly_model = LinearRegression()
lr_poly_model.fit(X_poly_train, y_train)
y_pred_lr_poly = lr_poly_model.predict(X_poly_test)
mse_lr_poly = mean_squared_error(y_test, y_pred_lr_poly)
r2_lr_poly = r2_score(y_test, y_pred_lr_poly)

# Displaying the results
results = {
    "Decision Tree": {"MSE": mse_dt, "R2 Score": r2_dt},
    "k-NN": {"MSE": mse_knn, "R2 Score": r2_knn},
    "Linear Regression": {"MSE": mse_lr, "R2 Score": r2_lr},
    "Polynomial Linear Regression": {"MSE": mse_lr_poly, "R2 Score": r2_lr_poly}
}
import pprint
pp = pprint.PrettyPrinter(indent=4)
pp.pprint(results)


{   'Decision Tree': {   'MSE': 20.954405339805824,
                         'R2 Score': 0.9186795331743481},
    'Linear Regression': {   'MSE': 75.4674986230397,
                             'R2 Score': 0.7071235323232181},
    'Polynomial Linear Regression': {   'MSE': 34.68604618016109,
                                        'R2 Score': 0.8653893812797192},
    'k-NN': {'MSE': 72.70981852427184, 'R2 Score': 0.717825617605572}}
