# Libraries

In [None]:
# Data manipulation libraries
import pandas as pd # Dataframes

# Statistical libraries
from sklearn.model_selection import train_test_split # Split dataset for validation
from sklearn.model_selection import cross_val_score # Cross validation for models

# Modeling libraries
from sklearn.linear_model import LinearRegression

# Data

In [323]:
df = pd.read_csv('data/clean_data_2.csv')\
           .drop(columns = ['Order_ID'])\
           .dropna()

target_col = 'Delivery_Time_min'
X = df.drop(columns = target_col)
y = df[target_col]

# Baseline model

In [324]:
def eval_model(model_name:str, X:pd.DataFrame, y:pd.Series, 
               models:dict, test_size:float = 0.3, random_state:int = 42, 
               verbose:bool = False) -> tuple[str,float,float,float]:
    
    # Split data
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, 
        test_size = test_size, 
        random_state = random_state
    )  

    try:
        # Train model      
        model = models[model_name]
        model.fit(X_train, y_train)

        # Evaluate
        score_cv = cross_val_score(model, X_train, y_train, cv=5)
        score_test = model.score(X_test, y_test)
        if verbose:
            print(f'{model_name} \n'\
                f'mean cross-validation score: {score_cv.mean():0.4f} '\
                f'with a standard deviation of {score_cv.std():0.4f}\n'\
                f'test score: {score_test:0.4f}\n'\
                '-----')
        
        return (model_name, score_cv.mean(), score_cv.std(), score_test)
    
    except:
        return (model_name, 0, 0, 0)

In [325]:
models = {'Linear Regression': LinearRegression()}

In [330]:
eval_model('Linear Regression', X, y, models, verbose = True)

Linear Regression 
mean cross-validation score: 0.7517 with a standard deviation of 0.0366
test score: 0.8351
-----


('Linear Regression',
 0.7516632570579345,
 0.0365981173095549,
 0.8351348286803177)