In [1]:
import pandas as pd
import numpy as np
from scipy.io import arff
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, r2_score

In [21]:
# Încărcarea datelor
data = arff.loadarff('autoMpg.arff')
df = pd.DataFrame(data[0])

In [22]:
# Conversia coloanelor de tip byte la string
for column in df.columns:
    if df[column].dtype == 'object':
        df[column] = df[column].str.decode('utf-8')

In [23]:
# Înlocuirea valorile lipsă cu media coloanei respective
df.fillna(df.mean(), inplace=True)

In [24]:
# Selectarea atributelor de intrare și selectarea atributului țintă
X = df.drop('class', axis=1)
Y = df['class']

In [25]:
# Separarea setului de date in subset de antrenare si subset de testare (70%-30%)
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=42)

In [27]:
# Modelul 1: Regressie Liniară
lr = LinearRegression()
lr.fit(X_train, Y_train)
Y_pred_lr = lr.predict(X_test)

# Modelul 2: Arbore de Decizie pentru regresie
dt = DecisionTreeRegressor(random_state=42)
dt.fit(X_train, Y_train)
Y_pred_dt = dt.predict(X_test)

In [32]:
# Evaluarea performanțelor
mse_lr = mean_squared_error(Y_test, Y_pred_lr)
r2_lr = r2_score(Y_test, Y_pred_lr)

mse_dt = mean_squared_error(Y_test, Y_pred_dt)
r2_dt = r2_score(Y_test, Y_pred_dt)

print("Performanțele modelului: Regresie liniară")
print("Mean squared error: %.2f" % mse_lr)
print("Coefficient of determination: %.2f" % r2_lr)

print("\nPerformanțele modelului: Arbore de Decizie")
print("Mean squared error: %.2f" % mse_dt)
print("Coefficient of determination: %.2f" % r2_dt)

Performanțele modelului: Regresie liniară
Mean squared error: 9.28
Coefficient of determination: 0.84

Performanțele modelului: Arbore de Decizie
Mean squared error: 12.75
Coefficient of determination: 0.78
