# Detect Overfitting or Underfitting

In this notebook, we will explore how to detect overfitting and underfitting in machine learning models. We will use a simple dataset and visualize the training and validation performance to understand these concepts better.

In [None]:
# Import necessary libraries
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

# Create a synthetic dataset
X, y = make_classification(n_samples=1000, n_features=20, n_informative=2, n_redundant=10, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a Random Forest Classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluate the model
train_accuracy = model.score(X_train, y_train)
val_accuracy = model.score(X_val, y_val)

# Visualize the results
plt.figure(figsize=(10, 5))
plt.bar(['Train Accuracy', 'Validation Accuracy'], [train_accuracy, val_accuracy], color=['blue', 'orange'])
plt.ylim(0, 1)
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.show()