In [8]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, r2_score
import joblib

# Load the dataset
file_path = '/workspaces/codespaces-jupyter/data/processed/harga_sayuran_clean.csv'
data = pd.read_csv(file_path)

# Convert 'Tanggal Harga' to datetime, handle inconsistent date formats
data['Tanggal Harga'] = pd.to_datetime(data['Tanggal Harga'], dayfirst=True, errors='coerce')

# Drop rows with invalid dates
data = data.dropna(subset=['Tanggal Harga'])

# Extract features from the date
data['Year'] = data['Tanggal Harga'].dt.year
data['Month'] = data['Tanggal Harga'].dt.month

# Select features and target variable
features = ['Year', 'Month', 'Jenis Sayuran', 'Season']
target = 'Harga Beli Pasar per Kilogram'

# One-hot encode categorical variables
data_encoded = pd.get_dummies(data[features])

# Combine with target variable
data_encoded[target] = data[target]

# Split the data into training and testing sets
train_data, test_data = train_test_split(data_encoded, test_size=0.2, random_state=42)

# Separate features and target variable
X_train = train_data.drop(columns=[target])
y_train = train_data[target]
X_test = test_data.drop(columns=[target])
y_test = test_data[target]

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize the SVR model
svr = SVR(kernel='rbf', C=100, gamma=0.1, epsilon=0.1)

# Train the model
svr.fit(X_train_scaled, y_train)

# Make predictions on the test set
y_pred = svr.predict(X_test_scaled)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse}")
print(f"R-squared: {r2}")

  data['Tanggal Harga'] = pd.to_datetime(data['Tanggal Harga'], dayfirst=True, errors='coerce')


Mean Squared Error: 47232488.78359342
R-squared: 0.8587258828904004


In [10]:
# Create future dates for the next 3-6 months
future_dates = pd.date_range(start='2024-07-01', periods=6, freq='M')

# List of vegetables
vegetables = data['Jenis Sayuran'].unique()

# Generate the feature set for future dates for each vegetable
forecast_results = pd.DataFrame()

for vegetable in vegetables:
    future_data = pd.DataFrame({
        'Year': future_dates.year,
        'Month': future_dates.month,
        'Jenis Sayuran': vegetable,  # Example for one vegetable, you can loop for others
        'Season': 'Kemarau'  # Assuming the next few months are dry season
    })

    # One-hot encode categorical variables
    future_data_encoded = pd.get_dummies(future_data)
    future_data_encoded = future_data_encoded.reindex(columns=X_train.columns, fill_value=0)

    # Standardize the future data
    future_data_scaled = scaler.transform(future_data_encoded)

    # Make future predictions
    future_predictions = svr.predict(future_data_scaled)

    # Prepare the results for the dashboard
    forecast = pd.DataFrame({
        'Tanggal Harga': future_dates,
        'Jenis Sayuran': vegetable,
        'Harga Beli Pasar per Kilogram': future_predictions
    })

    forecast_results = pd.concat([forecast_results, forecast])


In [12]:

# Save the trained model
joblib.dump(svr, 'svr_model.pkl')

# Save the scaler
joblib.dump(scaler, 'scaler.pkl')

# Save the forecast results
forecast_results.to_csv('forecast_results.csv', index=False)

# Determine the best vegetable for 2024
best_vegetable = forecast_results.groupby('Jenis Sayuran')['Harga Beli Pasar per Kilogram'].mean().idxmax()
best_price_performance = forecast_results.groupby('Jenis Sayuran')['Harga Beli Pasar per Kilogram'].mean().max()

# Save additional information
additional_info = {
    'best_vegetable': best_vegetable,
    'best_price_performance': best_price_performance,
    'seasons_2024': {
        'January - March': 'Rainy Season',
        'April - June': 'Transition to Dry Season',
        'July - September': 'Dry Season',
        'October - December': 'Transition to Rainy Season'
    },
    'recommendations': {
        'plant': ['Sawi Putih', 'Cabe Chilli'],
        'avoid': ['Cabe Rawit', 'Kol']
    }
}
joblib.dump(additional_info, 'additional_info.pkl')

['additional_info.pkl']