In [29]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import matplotlib.pyplot as plt
import seaborn as sns


In [30]:
df = pd.read_csv('cleaned_data.csv')

In [31]:
X = df[['carat', 'x', 'y', 'z']]
y = df['price']

In [32]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [33]:
model = LinearRegression()
model.fit(X_train, y_train)

ValueError: Expected a 2-dimensional container but got <class 'pandas.core.series.Series'> instead. Pass a DataFrame containing a single row (i.e. single sample) or a single column (i.e. single feature) instead.

In [None]:
y_pred = model.predict(X_test)

In [None]:
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

In [None]:
print(f'MAE: {mae}')
print(f'MSE: {mse}')
print(f'R²: {r2}')

In [None]:
coefficients = pd.DataFrame({'Feature': X.columns, 'Importance': model.coef_})
print(coefficients.sort_values(by='Importance', ascending=False))

In [None]:
plt.figure(figsize=(8, 6))
plt.scatter(y_test, y_pred, alpha=0.7, color='b')
plt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], color='r', lw=2)
plt.xlabel('Actual price')
plt.ylabel('Predicted price')
plt.title('Actual vs Predicted prices')
plt.show()


In [None]:
mean_price = y_test.mean()

# MAE in percentage
mae_percentage = (mae / mean_price) * 100

print(f'Средняя цена: {mean_price}')
print(f'MAE в процентах от средней цены: {mae_percentage:.2f}%')
