In [1]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import pandas as pd
import numpy as np
import joblib

from clases import * 

lr = LinearRegression()

# Pipeline completo actualizado
pipe = Pipeline(steps=[
    ('drop_columns', ColumnDropper(columns_to_drop=['Unnamed: 0', 'Temp9am', 'Temp3pm', 'Pressure9am'])),
    ('location_filter', LocationFilter(valid_locations=['Albury', 'Sydney', 'SydneyAirport', 'Canberra', 'Melbourne', 'MelbourneAirport'])),
    ('imputer', DataFrameImputer(strategy="most_frequent")),
    #('outlier_capper', OutlierCapper(variables=['Rainfall', 'Evaporation'], quantile=0.99)), 
    ('wind_components', WindComponentsTransformer(columns=['WindGustDir', 'WindDir9am', 'WindDir3pm'])),
    ('date_components', DateComponentsTransformer()), 
    ('final_drop', ColumnDropper(columns_to_drop=['Location'])),
    ('scaler', StandardScaler()),  
    ('model', lr)
])

In [2]:
df_train = pd.read_csv('./df_regresion/df_train.csv') 

X_train = df_train.drop(['RainTomorrow', 'RainfallTomorrow'], axis=1) 
y_train = df_train[['RainfallTomorrow']]

pipe.fit(X_train, y_train)

In [3]:
pipe.score(X_train, y_train) 

0.2034593317282566

In [4]:
# Guardar el pipeline
joblib.dump(pipe, './docker/pipeline.pkl')

['./docker/pipeline.pkl']

In [5]:
df_test = pd.read_csv('./df_regresion/df_test.csv')
#df_test.head(5)

X = df_test.drop(['RainTomorrow', 'RainfallTomorrow'], axis=1)
y = df_test[['RainfallTomorrow']]

pipe.score(X, y) 

0.22205298791665928

In [None]:
# DOCKER

# build image 
!docker build -t inference-python-test ./docker 

# run container 
!docker run --rm --name inference-python-test -v "/c/Users/franc/OneDrive/Escritorio/FACU/2025/AA1/mlops/df_regresion:/temp" inference-python-test


#0 building with "desktop-linux" instance using docker driver

#1 [internal] load build definition from dockerfile
#1 transferring dockerfile: 488B done
#1 DONE 0.0s

#2 [auth] library/python:pull token for registry-1.docker.io
#2 DONE 0.0s

#3 [internal] load metadata for docker.io/library/python:3.12-slim
#3 DONE 1.3s

#4 [internal] load .dockerignore
#4 transferring context: 2B done
#4 DONE 0.0s

#5 [internal] load build context
#5 transferring context: 7.93kB 0.0s done
#5 DONE 0.0s

#6 [1/7] FROM docker.io/library/python:3.12-slim@sha256:c8c30392319771be35fb3f11422fc8f453db2d58554d791dadbb1f9b360588a9
#6 resolve docker.io/library/python:3.12-slim@sha256:c8c30392319771be35fb3f11422fc8f453db2d58554d791dadbb1f9b360588a9 0.0s done
#6 DONE 0.0s

#7 [2/7] WORKDIR /app
#7 CACHED

#8 [3/7] COPY requirements.txt ./
#8 CACHED

#9 [4/7] RUN pip install --no-cache-dir -r requirements.txt
#9 CACHED

#10 [5/7] COPY pipeline.pkl .
#10 DONE 0.0s

#11 [6/7] COPY inference.py .
#11 DONE 0.0s

#12 [7

2025-02-25 11:29:59,269: INFO: inference.py: loaded pipeline
2025-02-25 11:29:59,284: INFO: inference.py: loaded input
2025-02-25 11:29:59,314: INFO: inference.py: made predictions
2025-02-25 11:29:59,324: INFO: inference.py: saved output


In [7]:
df_output = pd.read_csv('./df_regresion/output.csv')
df_output.head(5)   

y_pred = df_output.RainfallTomorrow_predicted

round(r2_score(y, y_pred),3), round(np.sqrt(mean_squared_error(y, y_pred)),3)

(0.222, 6.794)