### Task 1: Introduction to Isolation Forest
**Description**: Install the necessary library and load a sample dataset.

**Steps**:
1. Install scikit-learn
2. Load a sample dataset using Python

In [None]:
# write your code from here
from sklearn.datasets import load_breast_cancer

# Load sample dataset
data = load_breast_cancer()
X = data.data
y = data.target

print("Features shape:", X.shape)
print("Target shape:", y.shape)
print("Feature names:", data.feature_names)


### Task 2: Building an Isolation Forest
**Description**: Initialize an Isolation Forest model and fit it to the Boston dataset.

**Steps**:
1. Initialize Isolation Forest
2. Fit model

In [None]:
# write your code from here
# Step 0: Install required packages if needed
# !pip install scikit-learn

from sklearn.datasets import load_boston
from sklearn.ensemble import IsolationForest
import numpy as np

# Step 1: Load Boston dataset
boston = load_boston()
X = boston.data

# Step 2: Initialize Isolation Forest model
iso_forest = IsolationForest(n_estimators=100, contamination='auto', random_state=42)

# Step 3: Fit model
iso_forest.fit(X)

# Optional: Predict anomalies (-1 means anomaly, 1 means normal)
preds = iso_forest.predict(X)
print("Predictions (1 = normal, -1 = anomaly):")
print(preds)

# Summary of anomalies detected
num_anomalies = np.sum(preds == -1)
print(f"Number of anomalies detected: {num_anomalies}")



### Task 3: Detecting Anomalies
**Description**: Use the fitted Isolation Forest model to predict anomalies.

**Steps**:
1. Predict anomalies
2. Display anomaly counts

In [None]:
# write your code from here
from sklearn.datasets import load_boston
from sklearn.ensemble import IsolationForest
import numpy as np

# Load data
boston = load_boston()
X = boston.data

# Initialize and fit Isolation Forest
iso_forest = IsolationForest(n_estimators=100, contamination='auto', random_state=42)
iso_forest.fit(X)

# Step 1: Predict anomalies (-1 means anomaly, 1 means normal)
predictions = iso_forest.predict(X)

# Step 2: Display anomaly counts
num_anomalies = np.sum(predictions == -1)
num_normals = np.sum(predictions == 1)

print(f"Total samples: {len(predictions)}")
print(f"Anomalies detected: {num_anomalies}")
print(f"Normal samples: {num_normals}")


### Task 4: Visualizing Anomalies
**Description**: Visualize the results to see which samples are considered anomalies.

**Steps**:
1. Plot a scatter plot

In [None]:
# write your code from here
import matplotlib.pyplot as plt
from sklearn.datasets import load_boston
from sklearn.ensemble import IsolationForest

# Load data
boston = load_boston()
X = boston.data

# Fit Isolation Forest
iso_forest = IsolationForest(n_estimators=100, contamination='auto', random_state=42)
iso_forest.fit(X)

# Predict anomalies
predictions = iso_forest.predict(X)

# Plot
plt.figure(figsize=(10, 6))
plt.title("Isolation Forest Anomaly Detection (First two features)")

# Normal points
plt.scatter(X[predictions == 1, 0], X[predictions == 1, 1], c='blue', label='Normal')

# Anomalies
plt.scatter(X[predictions == -1, 0], X[predictions == -1, 1], c='red', label='Anomaly')

plt.xlabel('Feature 1 (CRIM)')
plt.ylabel('Feature 2 (ZN)')
plt.legend()
plt.show()


### Task 5: Interpret Contamination Parameter
**Description**: Experiment with different contamination levels.

In [None]:
# write your code from here
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_boston
from sklearn.ensemble import IsolationForest

# Load data
boston = load_boston()
X = boston.data

# Define contamination values to test
contamination_levels = np.linspace(0.01, 0.2, 10)  # from 1% to 20%

anomaly_counts = []

for contamination in contamination_levels:
    iso_forest = IsolationForest(n_estimators=100, contamination=contamination, random_state=42)
    iso_forest.fit(X)
    preds = iso_forest.predict(X)
    # Count anomalies (-1 labels)
    count_anomalies = sum(preds == -1)
    anomaly_counts.append(count_anomalies)
    print(f"Contamination: {contamination:.2f} -> Anomalies detected: {count_anomalies}")

# Plot contamination vs anomalies detected
plt.figure(figsize=(8,5))
plt.plot(contamination_levels, anomaly_counts, marker='o')
plt.title('Effect of Contamination Parameter on Anomaly Detection')
plt.xlabel('Contamination')
plt.ylabel('Number of Anomalies Detected')
plt.grid(True)
plt.show()
