In [None]:
# Import required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_iris

In [None]:

# Load and prepare the iris dataset
iris = load_iris()
iris_data = pd.DataFrame(data=np.c_[iris['data'], iris['target']], 
                        columns=[*iris['feature_names'], 'Type'])

In [None]:
# Convert numerical features to categorical for demonstration
for column in iris['feature_names']:
    iris_data[column] = pd.qcut(iris_data[column], q=3, labels=['Small', 'Medium', 'Large'])

In [None]:
# Convert target to binary (Setosa vs Others)
iris_data['Type'] = iris_data['Type'].map({0: 'Positive', 1: 'Negative', 2: 'Negative'})

In [None]:

# Display first few rows
print("First few rows of the prepared dataset:")
print(iris_data.head())

# Basic data info
print("\nDataset Info:")
print(iris_data.info())

# Check for missing values
print("\nMissing Values:")
print(iris_data.isnull().sum())

# Visualize the distribution of features for positive examples
positive_data = iris_data[iris_data['Type'] == 'Positive']

fig, axes = plt.subplots(2, 2, figsize=(12, 8))
for idx, feature in enumerate(iris['feature_names']):
    sns.countplot(data=positive_data, x=feature, ax=axes[idx//2, idx%2])
    axes[idx//2, idx%2].set_title(f'Distribution of {feature}')
plt.tight_layout()
plt.show()

In [None]:

# FIND-S Algorithm Implementation
target_type = "Positive"  # We're looking for characteristics of Setosa (class 0)
positive_examples = iris_data[iris_data['Type'] == target_type]

if positive_examples.empty:
    print(f"No positive examples found for {target_type}.")
else:
    # Initialize the hypothesis to the first positive example's attributes
    initial_hypothesis = positive_examples.iloc[0][iris['feature_names']].values.tolist()
    
    # Apply FIND-S algorithm
    for _, example in positive_examples.iterrows():
        for i in range(len(initial_hypothesis)):
            if initial_hypothesis[i] != example[iris['feature_names']].values[i]:
                initial_hypothesis[i] = '?'  # Use '?' to denote a generalization
                # Print the final hypothesis
    print("\nMost Specific Hypothesis for", target_type, "Iris:")
    for feature, value in zip(iris['feature_names'], initial_hypothesis):
        print(f"{feature}: {value}")