In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from lightgbm import LGBMRegressor
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error

In [2]:
df = pd.read_csv("data202.csv")

In [8]:
dum2 = pd.get_dummies(df['plot'], prefix='plot')
dum3 = pd.get_dummies(df['mitti'], prefix='mitti')
dum4 = pd.get_dummies(df['plant'], prefix='plant')

In [9]:
merg = pd.concat([df, dum2, dum3, dum4], axis='columns')
final = merg.drop(['plot', 'mitti', 'plant'], axis='columns')

In [10]:
final.fillna(final.mean(), inplace=True)
final.replace([np.inf, -np.inf], np.finfo(np.float32).max, inplace=True)

In [11]:
X_train, X_test, y_train, y_test = train_test_split(final.drop('pro', axis='columns'), final['pro'], test_size=0.3, random_state=42)

In [12]:
# Train the LightGBM model
lgbm_reg = LGBMRegressor()
lgbm_reg.fit(X_train, y_train)

LGBMRegressor()

In [13]:
# Predict on the test set
y_pred = lgbm_reg.predict(X_test)

In [14]:
# Calculate metrics
accuracy = lgbm_reg.score(X_test, y_test)
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

In [15]:
# Calculate adjusted R2 score
n = X_test.shape[0]
p = X_test.shape[1]
adjusted_r2 = 1 - ((1 - r2) * (n - 1) / (n - p - 1))

In [16]:
print("Accuracy:", accuracy)
print("Mean Squared Error:", mse)
print("Mean Absolute Error:", mae)
print("R2 Score:", r2)
print("Adjusted R2 Score:", adjusted_r2)

Accuracy: 0.7696490105872547
Mean Squared Error: 1412748.355414466
Mean Absolute Error: 753.9347925108309
R2 Score: 0.7696490105872547
Adjusted R2 Score: 0.6666555393594407
