In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import LabelEncoder

In [2]:
# Q1 for RandomForest and AdaBoost: Read the dataset and do necessary preprocessing
df = pd.read_csv("IRIS.csv")

# Data imputation for missing values
# Assuming numerical features; for categorical features, strategy might be 'most_frequent'
imputer = SimpleImputer(missing_values=np.nan, strategy='mean')
df[df.select_dtypes(include=[np.number]).columns] = imputer.fit_transform(df.select_dtypes(include=[np.number]))

# Encoding categorical to numerical if necessary
# Assuming 'species' is a categorical column that needs to be encoded
if df['species'].dtype == 'object':
    encoder = LabelEncoder()
    df['species'] = encoder.fit_transform(df['species'])

In [3]:
# Q2 for RandomForest and AdaBoost: Choose independent (X) and dependent (Y) variables
X = df.drop('species', axis=1)  # Assuming 'species' is the target variable
Y = df['species']

# Splitting dataset into training and testing sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [4]:
# Q3 & Q4 & Q5 for RandomForest: Creating models with different parameters, finding best parameters, and evaluating
rf_params = [
    {'n_estimators': 100, 'criterion': 'gini', 'max_depth': None, 'max_features': 'auto'},
    {'n_estimators': 150, 'criterion': 'entropy', 'max_depth': 10, 'max_features': 'sqrt'},
    {'n_estimators': 200, 'criterion': 'gini', 'max_depth': 20, 'max_features': 'log2'},
    {'n_estimators': 100, 'criterion': 'entropy', 'max_depth': None, 'max_features': None},
]

for params in rf_params:
    clf_rf = RandomForestClassifier(**params, random_state=0)
    clf_rf.fit(X_train, Y_train)
    Y_pred = clf_rf.predict(X_test)
    print("Random Forest Parameters:", params)
    print("Confusion Matrix:\n", confusion_matrix(Y_test, Y_pred))
    print("Accuracy:", accuracy_score(Y_test, Y_pred))
    print("Precision:", precision_score(Y_test, Y_pred, average='macro'))
    print("Recall:", recall_score(Y_test, Y_pred, average='macro'))
    print("F1 Score:", f1_score(Y_test, Y_pred, average='macro'))
    print(classification_report(Y_test, Y_pred))
    print("----------\n")

  warn(


Random Forest Parameters: {'n_estimators': 100, 'criterion': 'gini', 'max_depth': None, 'max_features': 'auto'}
Confusion Matrix:
 [[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]
Accuracy: 1.0
Precision: 1.0
Recall: 1.0
F1 Score: 1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30

----------

Random Forest Parameters: {'n_estimators': 150, 'criterion': 'entropy', 'max_depth': 10, 'max_features': 'sqrt'}
Confusion Matrix:
 [[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]
Accuracy: 1.0
Precision: 1.0
Recall: 1.0
F1 Score: 1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00        

In [5]:
# Q3 & Q4 & Q5 for AdaBoost: Creating models with different parameters, finding best parameters, and evaluating
ab_params = [
    {'n_estimators': 50, 'learning_rate': 1.0, 'algorithm': 'SAMME.R'},
    {'n_estimators': 100, 'learning_rate': 0.5, 'algorithm': 'SAMME'},
    {'n_estimators': 150, 'learning_rate': 0.75, 'algorithm': 'SAMME.R'},
    {'n_estimators': 200, 'learning_rate': 1.0, 'algorithm': 'SAMME'},
]

for params in ab_params:
    clf_ab = AdaBoostClassifier(**params, random_state=0)
    clf_ab.fit(X_train, Y_train)
    Y_pred = clf_ab.predict(X_test)
    print("AdaBoost Parameters:", params)
    print("Confusion Matrix:\n", confusion_matrix(Y_test, Y_pred))
    print("Accuracy:", accuracy_score(Y_test, Y_pred))
    print("Precision:", precision_score(Y_test, Y_pred, average='macro'))
    print("Recall:", recall_score(Y_test, Y_pred, average='macro'))
    print("F1 Score:", f1_score(Y_test, Y_pred, average='macro'))
    print(classification_report(Y_test, Y_pred))
    print("----------\n")

AdaBoost Parameters: {'n_estimators': 50, 'learning_rate': 1.0, 'algorithm': 'SAMME.R'}
Confusion Matrix:
 [[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]
Accuracy: 1.0
Precision: 1.0
Recall: 1.0
F1 Score: 1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30

----------

AdaBoost Parameters: {'n_estimators': 100, 'learning_rate': 0.5, 'algorithm': 'SAMME'}
Confusion Matrix:
 [[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]
Accuracy: 1.0
Precision: 1.0
Recall: 1.0
F1 Score: 1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00    

In [48]:
import fcmeans
help(fcmeans.main.FCM)

Help on class FCM in module fcmeans.main:

class FCM(pydantic.main.BaseModel)
 |  FCM(*, n_clusters: typing.Annotated[int, Ge(ge=1)] = 5, max_iter: typing.Annotated[int, Ge(ge=1), Le(le=1000)] = 150, m: typing.Annotated[float, Ge(ge=1.0)] = 2.0, error: typing.Annotated[float, Ge(ge=1e-09)] = 1e-05, random_state: Optional[int] = None, trained: bool = False, n_jobs: typing.Annotated[int, Ge(ge=1)] = 1, verbose: Optional[bool] = False, distance: Union[fcmeans.main.DistanceOptions, Callable, NoneType] = <DistanceOptions.euclidean: 'euclidean'>, distance_params: Optional[Dict] = {}, **extra_data: Any) -> None
 |  
 |  Fuzzy C-means Model
 |  
 |  Attributes:
 |      n_clusters (int): The number of clusters to form as well as the number
 |      of centroids to generate by the fuzzy C-means.
 |      max_iter (int): Maximum number of iterations of the fuzzy C-means
 |      algorithm for a single run.
 |      m (float): Degree of fuzziness: $m \in (1, \infty)$.
 |      error (float): Relative t