In [None]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
import datetime as dt
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA
data = pd.read_csv('data_set_.csv')

In [None]:
# Convert the date variables to datetime format
data["order_date"] = pd.to_datetime(data["order_date"], format="%d.%m.%Y")
data["requested_delivery_date"] = pd.to_datetime(data["requested_delivery_date"], format="%d.%m.%Y")

# Extract the month variable from the date variables
data["Order Month"] = data["order_date"].dt.month
data["Delivery Month"] = data["requested_delivery_date"].dt.month
data['Order Quarter'] = data['order_date'].dt.quarter

# Select the variables of interest
df = data[["Order Month", "Delivery Month", "Customer Country Code", "Product Code", "order_type", "Customer Order Code", "value", "items", "Route"]]

# Remove any missing values or outliers
df = df.dropna()

# Calculate the number of days between order date and requested delivery date
df['Delivery Time'] = (df['requested_delivery_date'] - df['order_date']).dt.days

# Drop rows with invalid item values
df.drop(df[df['items'] == "\\N"].index, inplace=True)


In [None]:
# Convert 'value' and 'items' columns to integer type
df['value'] = df['value'].astype('int')
df['items'] = df['items'].astype('int')

# Calculate the total order value by multiplying price and quantity
df['Total Value'] = df['value'] * df['items']


In [None]:
# Calculate the choice probability based on Delivery Time and Total Value
df['Choice Probability'] = 1 / (1 + np.exp(-(-1.5 + 0.05 * df['Delivery Time'] + 0.001 * df['Total Value'])))

# Define X and y
X = df[['Order Month', 'Delivery Month']]
y = df['Choice Probability']

# Fit logistic regression model
logit_model = sm.Logit(y, X)
result = logit_model.fit()

# Print summary of results
print(result.summary2())

# Calculate the 'x_values' based on the logistic regression coefficients
df['x_values'] = df['Order Month'] * -0.2103 + df['Delivery Month'] * -0.5173

# Calculate the predicted items using the 'x_values'
df['predicted_items'] = df['x_values'] + 11

# Perform SARIMAX modeling
model = sm.tsa.SARIMAX(df["predicted_items"], order=(1, 1, 1), seasonal_order=(1, 1, 1, 12))
results = model.fit()

# Print SARIMAX model summary
print(results.summary())