In [31]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.impute import SimpleImputer
import joblib

def load_and_preprocess_regression_data():
    data = pd.read_csv('C:/Users/mitja/Desktop/mbajk_dataset(2).csv', parse_dates=['date'])
    data['day'] = data['date'].dt.day
    data['month'] = data['date'].dt.month
    data['year'] = data['date'].dt.year
    data['hour'] = data['date'].dt.hour
    data.drop('date', axis=1, inplace=True)

    imputer = SimpleImputer(strategy='mean')
    numerical_columns = data.select_dtypes(include=[np.number]).columns
    data[numerical_columns] = imputer.fit_transform(data[numerical_columns])

    target_column = 'available_bike_stands'
    scalerY = StandardScaler()
    data[target_column] = scalerY.fit_transform(data[[target_column]])
    joblib.dump(scalerY, "target_scaler.pkl")

    scalerX = StandardScaler()
    dataX = data.drop(target_column, axis=1)
    dataX = scalerX.fit_transform(dataX)
    joblib.dump(scalerX, "feature_scaler.pkl")

    X = pd.DataFrame(dataX, columns=data.drop(target_column, axis=1).columns)
    y = data[target_column]

    return X, y

def main():
    print("Obdelava podatkov")
    X, y = load_and_preprocess_regression_data()

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    print("Usposabljanje Random Forest modela")
    rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
    rf_model.fit(X_train, y_train)

    y_pred = rf_model.predict(X_test)

    target_scaler = joblib.load("target_scaler.pkl")
    y_pred_original = target_scaler.inverse_transform(y_pred.reshape(-1, 1)).flatten()
    dfx = pd.DataFrame(y_pred_original)
    print(dfx.head)
    mse = mean_squared_error(y_test, y_pred_original)
    r2 = r2_score(y_test, y_pred_original)
    print(f"Mean Squared Error: {mse}")
    print(f"R2 Score: {r2}")

    model_filename = 'random_forest_model.pkl'
    joblib.dump(rf_model, model_filename)
    print(f"Model shranjen kot {model_filename}")

if __name__ == "__main__":
    main()


Obdelava podatkov
Usposabljanje Random Forest modela
<bound method NDFrame.head of           0
0      6.35
1      9.05
2     15.04
3     14.69
4     20.36
...     ...
3734  21.63
3735  14.68
3736  18.28
3737  19.25
3738  21.16

[3739 rows x 1 columns]>
Mean Squared Error: 197.52421836080705
R2 Score: -197.25338791606276
Model shranjen kot random_forest_model.pkl
