In [2]:
# Import required libraries
import pickle
import pandas as pd

In [3]:
# Open trained model
def get_model(model_path):
    with open(model_path, 'rb') as f_in:
        dv, model = pickle.load(f_in)
    return dv, model

In [4]:
# Function to read and preprocess data for predictions 
def read_data(input_file):
    df = pd.read_parquet(input_file)
    df['duration'] = df.tpep_dropoff_datetime - df.tpep_pickup_datetime
    df['duration'] = df.duration.dt.total_seconds() / 60
    df = df[(df.duration >= 1) & (df.duration <= 60)].copy()
    categorical = ['PULocationID', 'DOLocationID']
    df[categorical] = df[categorical].fillna(-1).astype('int').astype('str')
    return df

In [5]:
# Make predictions
def get_predictions(df, model_path):
    dicts = df[['PULocationID', 'DOLocationID']].to_dict(orient='records')
    dv, model = get_model(model_path)
    X_val = dv.transform(dicts)
    y_pred = model.predict(X_val)
    return y_pred

In [6]:

def process_results(df, month, year, y_pred):
    df['ride_id'] = f'{year:04d}/{month:02d}_' + df.index.astype('str')
    output_file = f'C:/Users/Camila/OneDrive/Escritorio/mlops zoomcamp/HW4/yellow_tripdata_{year:04d}-{month:02d}.parquet'
    
    df_result = pd.DataFrame()
    df_result['ride_id'] = df['ride_id']
    df_result['predictions'] = y_pred
    df_result.to_parquet(output_file, engine='pyarrow', compression=None, index=False)
    return df_result

    

In [11]:
def apply_model(month, year, model_path):
    input_file = f'C:/Users/Camila/OneDrive/Escritorio/mlops zoomcamp/data/yellow/yellow_tripdata_{year:04d}-{month:02d}.parquet'
    df = read_data(input_file)
    y_pred = get_predictions(df, model_path)  
    results = process_results(df, month, year, y_pred)
    return results.head()


In [12]:

# Set variables for February 2022
month = 2
year = 2022
model_path = 'model.bin'

apply_model(month, year, model_path)


Unnamed: 0,ride_id,predictions
0,2022/02_0,18.527783
1,2022/02_1,23.065782
2,2022/02_2,33.686359
3,2022/02_3,23.757436
4,2022/02_4,21.492904
