In [None]:
#### Data Preprocessing
from sklearn.preprocessing import StandardScaler, MinMaxScaler, LabelEncoder
from sklearn.model_selection import train_test_split

# Scaling
scaler = StandardScaler()  # Zero mean, unit variance
X_scaled = scaler.fit_transform(X)

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#### Common Models
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier

# Model Template
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
predictions = model.predict(X_test)

#### Model Evaluation
from sklearn.metrics import (accuracy_score, precision_score, recall_score, f1_score,
                           confusion_matrix, classification_report, mean_squared_error, r2_score)

# Classification Metrics
accuracy = accuracy_score(y_true, y_pred)
conf_matrix = confusion_matrix(y_true, y_pred)
report = classification_report(y_true, y_pred)

# Regression Metrics
mse = mean_squared_error(y_true, y_pred)
r2 = r2_score(y_true, y_pred)

#### Cross-Validation
from sklearn.model_selection import cross_val_score, GridSearchCV

# K-Fold Cross Validation
scores = cross_val_score(model, X, y, cv=5)

# Hyperparameter Tuning
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [10, 20, 30, None]
}
grid_search = GridSearchCV(model, param_grid, cv=5)
grid_search.fit(X_train, y_train)
best_params = grid_search.best_params_

#### Pipeline
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

# Create Pipeline
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('model', RandomForestClassifier())
])

# Fit and predict
pipeline.fit(X_train, y_train)
predictions = pipeline.predict(X_test)

#### Feature Selection
from sklearn.feature_selection import SelectKBest, RFE
from sklearn.decomposition import PCA

# Select K Best Features
selector = SelectKBest(k=10)
X_selected = selector.fit_transform(X, y)

# PCA
pca = PCA(n_components=0.95)  # Keep 95% of variance
X_pca = pca.fit_transform(X_scaled)