In [None]:

# Generalized Code for Partial Dependence Plots (PDP)

# 1. Importing Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestRegressor
from sklearn.inspection import plot_partial_dependence
from sklearn.model_selection import train_test_split

# 2. Loading Dataset Function
def load_dataset(file_path):
    return pd.read_csv(file_path)

# 3. Preprocessing Function
def preprocess_data(df, target_col):
    df = df.copy()
    df.columns = df.columns.str.lower()
    X = df.drop(columns=[target_col])
    y = df[target_col]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    return X_train, X_test, y_train, y_test

# 4. Model Training
def train_model(X_train, y_train):
    model = RandomForestRegressor(n_estimators=100)
    model.fit(X_train, y_train)
    return model

# 5. Plotting Partial Dependence Plots
def plot_pdp(X_train, model, features):
    plot_partial_dependence(model, X_train, features=features, grid_resolution=20)
    plt.subplots_adjust(left=0.1, right=0.9, bottom=0.1, top=0.9)
    plt.show()

# 6. Main Execution
if __name__ == "__main__":
    df = load_dataset('path_to_your_dataset.csv')
    X_train, X_test, y_train, y_test = preprocess_data(df, target_col='your_target_column')
    model = train_model(X_train, y_train)
    
    # List of features for which we want to plot PDPs
    features = ['feature_1', 'feature_2']  # replace with your actual feature names
    plot_pdp(X_train, model, features)
