In [1]:
import pandas as pd
df = pd.read_csv('./dataset.csv')
df

Unnamed: 0,Province,Year,Harvested Area,Production,Rainfall,Humidity,Temperature
0,Aceh,2018,329516,1861567,2336,81,28
1,Aceh,2019,310012,1714438,1437,82,27
2,Aceh,2020,317869,1757313,1790,76,29
3,Aceh,2021,297058,1634640,2293,76,29
4,Aceh,2022,271750,1509456,1834,76,29
...,...,...,...,...,...,...,...
199,Papua,2019,54132,235340,1823,77,28
200,Papua,2020,52728,166002,1502,75,28
201,Papua,2021,64985,286280,2028,76,28
202,Papua,2022,49742,193944,2576,84,28


In [2]:
input_n = df.drop(['Production', 'Year'], axis='columns')
input_n

Unnamed: 0,Province,Harvested Area,Rainfall,Humidity,Temperature
0,Aceh,329516,2336,81,28
1,Aceh,310012,1437,82,27
2,Aceh,317869,1790,76,29
3,Aceh,297058,2293,76,29
4,Aceh,271750,1834,76,29
...,...,...,...,...,...
199,Papua,54132,1823,77,28
200,Papua,52728,1502,75,28
201,Papua,64985,2028,76,28
202,Papua,49742,2576,84,28


In [3]:
from sklearn.preprocessing import LabelEncoder
le_province = LabelEncoder()
input_n['Province'] = le_province.fit_transform(input_n['Province'])
input_n

Unnamed: 0,Province,Harvested Area,Rainfall,Humidity,Temperature
0,0,329516,2336,81,28
1,0,310012,1437,82,27
2,0,317869,1790,76,29
3,0,297058,2293,76,29
4,0,271750,1834,76,29
...,...,...,...,...,...
199,23,54132,1823,77,28
200,23,52728,1502,75,28
201,23,64985,2028,76,28
202,23,49742,2576,84,28


In [4]:
target = df['Production']
target

0      1861567
1      1714438
2      1757313
3      1634640
4      1509456
        ...   
199     235340
200     166002
201     286280
202     193944
203     200115
Name: Production, Length: 204, dtype: int64

In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    input_n, target, test_size=0.2,random_state=42
     
)

In [6]:
print("test size: {}\n train size: {}".format(len(X_test), len(X_train)))

test size: 41
 train size: 163


In [7]:
#scaling feature usign standardization
from sklearn.preprocessing import MinMaxScaler
feature_scaler = MinMaxScaler(feature_range=(0, 1))
target_scaler = MinMaxScaler(feature_range=(0, 1))

#scale features
X_train_scaled = feature_scaler.fit_transform(X_train)
X_train_scaled = pd.DataFrame(X_train_scaled, columns=["Province","Harvested Area","Rainfall","Humidity","Temperature"], index=X_train.index)

# Scale target variable
# Reshape is needed because StandardScaler expects 2D array
y_train_scaled = target_scaler.fit_transform(y_train.values.reshape(-1, 1)).flatten()
print(X_train_scaled)
print(y_train_scaled)

     Province  Harvested Area  Rainfall  Humidity  Temperature
199  0.696970        0.029636  0.276728  0.363636          0.6
93   0.060606        0.174608  0.516504  0.272727          0.8
38   0.090909        0.035128  0.785344  0.363636          0.8
24   0.212121        0.047240  0.446959  0.727273          0.4
96   0.030303        0.060839  0.222960  0.500000          0.6
..        ...             ...       ...       ...          ...
106  0.636364        0.148176  0.448412  0.409091          0.8
14   0.939394        0.162212  0.880424  1.000000          0.6
92   0.060606        0.178497  0.458377  0.227273          1.0
179  0.787879        0.032372  0.428690  0.500000          0.6
102  0.636364        0.158688  0.244551  0.227273          0.8

[163 rows x 5 columns]
[2.23748270e-02 1.52662045e-01 2.78508815e-02 3.64431838e-02
 6.34951446e-02 6.96679212e-02 2.61100764e-02 2.19494598e-02
 4.26319617e-04 2.85469368e-02 2.31739381e-02 1.60713733e-01
 1.32682075e-01 4.94595523e-02 4.2518

In [8]:
# Step 6: Transform the test data using the same scalers
# CRUCIAL: Use transform (not fit_transform) on test data
X_test_scaled = feature_scaler.transform(X_test)
X_test_scaled = pd.DataFrame(X_test_scaled, columns=["Province","Harvested Area","Rainfall","Humidity","Temperature"], index=X_test.index)
y_test_scaled = target_scaler.transform(y_test.values.reshape(-1, 1)).flatten()

In [9]:
from sklearn.svm import SVR
model = SVR()

In [10]:
model.fit(X_train_scaled, y_train_scaled)

0,1,2
,kernel,'rbf'
,degree,3
,gamma,'scale'
,coef0,0.0
,tol,0.001
,C,1.0
,epsilon,0.1
,shrinking,True
,cache_size,200
,verbose,False


In [11]:
model.score(X_test_scaled, y_test_scaled) # This is the R² error

0.864355205827882

In [12]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

y_pred_scaled = model.predict(X_test_scaled)

# Regression metrics
mse = mean_squared_error(y_test_scaled, y_pred_scaled)
mae = mean_absolute_error(y_test_scaled, y_pred_scaled)
r2 = r2_score(y_test_scaled, y_pred_scaled)

print(f"Mean Squared Error: {mse:.6f}")
print(f"Mean Absolute Error: {mae:.6f}")
print(f"R² Score: {r2:.6f}")


Mean Squared Error: 0.014640
Mean Absolute Error: 0.085896
R² Score: 0.864355
