In [1]:
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import FunctionTransformer

# Step 1: Custom function for dropping column & handling outliers
def preprocess_data(df):
    # Drop "visibility"
    df = df.drop(columns=["visibility"], errors="ignore")
    
    # Example: handle outliers via capping
    for col in df.select_dtypes(include=['float64', 'int64']).columns:
        q1 = df[col].quantile(0.25)
        q3 = df[col].quantile(0.75)
        iqr = q3 - q1
        lower = q1 - 1.5 * iqr
        upper = q3 + 1.5 * iqr
        df[col] = df[col].clip(lower, upper)
    return df

# Function transformer wrapper
preprocessor = FunctionTransformer(preprocess_data)

# Final pipeline
pipeline = Pipeline([
    ('preprocess', preprocessor),                 # Custom preprocessing
    ('imputer', SimpleImputer(strategy='median')), # Missing values
    ('model', RandomForestRegressor(
        n_estimators=100,
        random_state=42
    ))
])

# Train pipeline
pipeline.fit(X_train, y_train)

pipeline.predict(X_test)

# Save for deployment
import joblib
joblib.dump(pipeline, "random_forest_pipeline.pkl")}

SyntaxError: unmatched '}' (40822864.py, line 41)