## Important Import Modules

# important upload
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn import ensemble, linear_model
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler, PowerTransformer
from sklearn.decomposition import PCA
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, mean_squared_log_error, mean_absolute_percentage_error
from sklearn.metrics import confusion_matrix, classification_report, roc_auc_score
from sklearn.neighbors import KNeighborsRegressor
from sklearn.cluster import KMeans

import xgboost as xgb
import lightgbm as lgbm
import catboost as cb
from hpelm import ELM
import tensorflow as tf

## Template Split, Fit and Predict

In [None]:
print(df.isnull().sum())

df_cleaned = df.dropna()

df['column_name'].fillna(df['column_name'].mean(), inplace=True)
df['column_name'].fillna(df['column_name'].median(), inplace=True)

In [None]:
df_cleaned = df.drop_duplicates()

In [None]:
df_cleaned = df.rename(columns={'OldName': 'NewName'})

In [None]:
df['column_name'] = df['column_name'].astype(float)

In [None]:
from sklearn.preprocessing import OneHotEncoder

df_encoded = pd.get_dummies(df, columns=['categorical_column'])

In [None]:
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
df['column_name'] = label_encoder.fit_transform(df['column_name'])

In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
df_scaled = scaler.fit_transform(df[['feature1', 'feature2']])

In [None]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
df_scaled = scaler.fit_transform(df[['feature1', 'feature2']])

In [None]:
X = df.drop('target_column', axis=1)
y = df['target_column']

In [None]:
from sklearn.model_selection import train_test_split

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
from sklearn.ensemble import RandomForestClassifier

# Initialize and train the model
clf = RandomForestClassifier()
clf.fit(X_train, y_train)


In [None]:
from sklearn.ensemble import RandomForestRegressor

# Initialize and train the model
reg = RandomForestRegressor()
reg.fit(X_train, y_train)


In [None]:
# Predict on test set
y_pred = clf.predict(X_test)

# Predict probabilities
y_pred_proba = clf.predict_proba(X_test)

In [None]:
# Predict on test set
y_pred = reg.predict(X_test)

In [None]:
from sklearn.metrics import confusion_matrix, classification_report

# Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print(conf_matrix)

# Classification Report
print(classification_report(y_test, y_pred))

In [None]:
from sklearn.metrics import accuracy_score

# Accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')

In [None]:
from sklearn.metrics import mean_squared_error, r2_score

# MSE
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse:.2f}')

# R-Squared
r2 = r2_score(y_test, y_pred)
print(f'R-squared: {r2:.2f}')

In [None]:
from sklearn.model_selection import cross_val_score

# Cross-validation on the training set
cv_scores = cross_val_score(clf, X_train, y_train, cv=5, scoring='accuracy')
print(f'Cross-Validation Scores: {cv_scores}')
print(f'Average CV Score: {cv_scores.mean():.2f}')

In [None]:
from sklearn.model_selection import GridSearchCV

# Define parameter grid
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5, 10]
}

# Initialize GridSearchCV
grid_search = GridSearchCV(clf, param_grid, cv=5, scoring='accuracy')

# Fit to training data
grid_search.fit(X_train, y_train)

# Best parameters
print(f'Best Parameters: {grid_search.best_params_}')

In [None]:
import joblib

# Save model to a file
joblib.dump(clf, 'random_forest_model.pkl')

In [None]:
# Load model from file
loaded_model = joblib.load('random_forest_model.pkl')

# Predict with loaded model
y_pred_loaded = loaded_model.predict(X_test)

In [None]:
clf = RandomForestClassifier(class_weight='balanced')
clf.fit(X_train, y_train)

In [None]:
from imblearn.over_sampling import SMOTE

smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_train, y_train)

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, mean_squared_error, r2_score
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor

X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

model = RandomForestClassifier()  # or RandomForestRegressor() for regression tasks
model.fit(X_train, y_train)

y_pred_val = model.predict(X_val)

## Metric Evaluation

In [None]:
from sklearn.metrics import 

y_pred_val = model.predict(X_val)

accuracy = accuracy_score(y_val, y_pred_val)
print(f'Accuracy: {accuracy:.2f}')

precision = precision_score(y_val, y_pred_val, average='weighted')
recall = recall_score(y_val, y_pred_val, average='weighted')
f1 = f1_score(y_val, y_pred_val, average='weighted')

print(f'Precision: {precision:.2f}')
print(f'Recall: {recall:.2f}')
print(f'F1-Score: {f1:.2f}')

conf_matrix = confusion_matrix(y_val, y_pred_val)
print('Confusion Matrix:\n', conf_matrix)

y_pred_proba = model.predict_proba(X_val)[:, 1]  # probability estimates
auc_roc = roc_auc_score(y_val, y_pred_proba)
print(f'AUC-ROC: {auc_roc:.2f}')

print(classification_report(y_val, y_pred_val))