In [None]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor

In [None]:
# data generation
np.random.seed(42)
n_samples = 100
X = np.linspace(0, 10, n_samples)
noise = np.random.normal(0, 0.2, n_samples)
y = np.sin(X) * 3 + noise
X = X.reshape(-1, 1)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


linear_model = LinearRegression()
tree_model = DecisionTreeRegressor(max_depth=3, random_state=42)

linear_model.fit(X_train, y_train)
tree_model.fit(X_train, y_train)


In [None]:
print("\nModel Performance:")
print(f"Linear Regression - MSE Score: {mean_squared_error(y_test, linear_model.predict(X_test)):.4f}")
print(f"Regression Tree    - MSE Score: {mean_squared_error(y_test, tree_model.predict(X_test)):.4f}")

In [None]:
X_plot = np.linspace(0, 10, n_samples).reshape(-1, 1)
y_linear_pred = linear_model.predict(X_plot)
y_tree_pred = tree_model.predict(X_plot)


plt.figure(figsize=(12, 6))
plt.scatter(X, y, color='gray', alpha=0.5, label='Data points')
plt.plot(X_plot, y_linear_pred, color='blue', label='Linear Regression')
plt.plot(X_plot, y_tree_pred, color='red', label='Regression Tree')
plt.title('Linear Regression vs Regression Tree on Non-linear Data')
plt.xlabel('X')
plt.ylabel('y')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()


### Random forrest regressor

In [None]:
from sklearn.ensemble import RandomForestRegressor
import pandas as pd
from sklearn.preprocessing import StandardScaler


In [None]:
df = pd.read_csv("data2.csv")
X = df.drop(['Weight', 'Species'], axis=1)
y = df['Weight']
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42)

In [None]:
tree_reg = DecisionTreeRegressor(max_depth=15, min_samples_leaf=1, random_state=42)
tree_reg.fit(X_train, y_train)


rf_reg = RandomForestRegressor(n_estimators=100, max_depth=15, 
                             min_samples_leaf=1, random_state=42)
rf_reg.fit(X_train, y_train)


y_train_tree = tree_reg.predict(X_train)
y_test_tree = tree_reg.predict(X_test)
y_train_rf = rf_reg.predict(X_train)
y_test_rf = rf_reg.predict(X_test)

In [None]:
tree_train_rmse = np.sqrt(mean_squared_error(y_train, y_train_tree))
tree_test_rmse = np.sqrt(mean_squared_error(y_test, y_test_tree))
rf_train_rmse = np.sqrt(mean_squared_error(y_train, y_train_rf))
rf_test_rmse = np.sqrt(mean_squared_error(y_test, y_test_rf))


print("Decision Tree Performance:")
print(f"Train RMSE: {tree_train_rmse:.4f}")
print(f"Test RMSE: {tree_test_rmse:.4f}")

print("\nRandom Forest Performance:")
print(f"Train RMSE: {rf_train_rmse:.4f}")
print(f"Test RMSE: {rf_test_rmse:.4f}")

In [None]:
plt.figure(figsize=(10, 6))
x = np.arange(2)
width = 0.35
plt.bar(x - width/2, [tree_train_rmse, rf_train_rmse], width, 
        label='Train RMSE', color='skyblue')
plt.bar(x + width/2, [tree_test_rmse, rf_test_rmse], width, 
        label='Test RMSE', color='lightcoral')
plt.ylabel('RMSE')
plt.title('Training vs Test Error Comparison')
plt.xticks(x, ['Decision Tree', 'Random Forest'])
plt.legend()

def add_labels(rects):
    for rect in rects:
        height = rect.get_height()
        plt.text(rect.get_x() + rect.get_width()/2., height,
                f'{height:.2f}',
                ha='center', va='bottom')

add_labels(plt.gca().patches)

plt.grid(True, axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()
plt.show()