In [12]:
import pandas as pd
from datetime import datetime
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
import joblib

In [14]:

# Load data from CSV
data = pd.read_csv("orders.csv")
data['access_time'] = pd.to_datetime(data['access_time'].apply(lambda x: "1970-01-01 " + x))
# Extract day of the week, hour, and minute features
data['day_of_week'] = data['access_time'].dt.dayofweek
data['hour_of_day'] = data['access_time'].dt.hour
data['minute_of_hour'] = data['access_time'].dt.minute

# Separate features and target variable
X = data[['day_of_week', 'hour_of_day', 'minute_of_hour']]
y = data['number_person']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Random Forest Regressor
rf_regressor = RandomForestRegressor(random_state=42)
rf_regressor.fit(X_train, y_train)



Mean Absolute Error: 1.355070316983151


['random_forest_regressor_model.pkl']

In [15]:
# Evaluate the model
y_pred = rf_regressor.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
print("Mean Absolute Error:", mae)

Mean Absolute Error: 1.355070316983151


In [16]:
# Export the trained model
joblib.dump(rf_regressor, 'random_forest_regressor_model.pkl')

['random_forest_regressor_model.pkl']

In [17]:
import pandas as pd
import joblib

rf_regressor = joblib.load('random_forest_regressor_model.pkl')

def preprocess_datetime(datetime_str):
    datetime_obj = pd.to_datetime(datetime_str)
    return {
        'day_of_week': datetime_obj.dayofweek,
        'hour_of_day': datetime_obj.hour,
        'minute_of_hour': datetime_obj.minute
    }

def predict_people_count(input_datetime):
    input_features = preprocess_datetime(input_datetime)
    input_df = pd.DataFrame([input_features])
    predicted_people_count = rf_regressor.predict(input_df)
    return predicted_people_count[0]

input_datetime = '2024-05-06 09:30:00'
predicted_count = predict_people_count(input_datetime)
print("Predicted number of people at", input_datetime, ":", predicted_count)


Predicted number of people at 2024-05-06 09:30:00 : 7.241594018636074
