In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

# Load the dataset
file_path = '/content/Synthetic_Iris_Like_Data - missing_synthetic_iris_like_data.csv'
data = pd.read_csv(file_path)

# Split the data into features and labels
X = data.drop('class', axis=1)
y = data['class']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Initialize and train the classifier
classifier = RandomForestClassifier(random_state=42)
classifier.fit(X_train, y_train)

# Make predictions
y_pred = classifier.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f'Accuracy: {accuracy:.2f}')
print('Classification Report:')
print(report)

ValueError: Input X contains NaN.
RandomForestClassifier does not accept missing values encoded as NaN natively. For supervised learning, you might want to consider sklearn.ensemble.HistGradientBoostingClassifier and Regressor which accept missing values encoded as NaNs natively. Alternatively, it is possible to preprocess the data, for instance by using an imputer transformer in a pipeline or drop samples with missing values. See https://scikit-learn.org/stable/modules/impute.html You can find a list of all estimators that handle NaN values at the following page: https://scikit-learn.org/stable/modules/impute.html#estimators-that-handle-nan-values

In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

# Load the dataset
file_path = '/content/Synthetic_Iris_Like_Data - missing_synthetic_iris_like_data.csv'
data = pd.read_csv(file_path)

# Function to fill missing values with the mean value of the column for the corresponding label
def fill_missing_with_label_mean(df, label_col):
    for col in df.columns:
        if col != label_col:
            df[col] = df.groupby(label_col)[col].transform(lambda x: x.fillna(x.mean()))
    return df

# Fill missing values
data = fill_missing_with_label_mean(data, 'class')

# Split the data into features and labels
X = data.drop('class', axis=1)
y = data['class']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Initialize and train the classifier
classifier = RandomForestClassifier(random_state=42)
classifier.fit(X_train, y_train)

# Make predictions
y_pred = classifier.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f'Accuracy: {accuracy:.2f}')
print('Classification Report:')
print(report)


Accuracy: 0.83
Classification Report:
              precision    recall  f1-score   support

           0       0.76      0.89      0.82        18
           1       0.85      0.77      0.81        22
           2       0.89      0.85      0.87        20

    accuracy                           0.83        60
   macro avg       0.84      0.84      0.83        60
weighted avg       0.84      0.83      0.83        60



In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import classification_report, accuracy_score

# Load the dataset
file_path = '/content/Synthetic_Iris_Like_Data - missing_synthetic_iris_like_data.csv'
data = pd.read_csv(file_path)

# Function to fill missing values with the mean value of the column for the corresponding label
def fill_missing_with_label_mean(df, label_col):
    for col in df.columns:
        if col != label_col:
            df[col] = df.groupby(label_col)[col].transform(lambda x: x.fillna(x.mean()))
    return df

# Fill missing values
data = fill_missing_with_label_mean(data, 'class')

# Split the data into features and labels
X = data.drop('class', axis=1)
y = data['class']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Initialize and train the Random Forest classifier
rf_classifier = RandomForestClassifier(random_state=42)
rf_classifier.fit(X_train, y_train)

# Make predictions with Random Forest
rf_y_pred = rf_classifier.predict(X_test)

# Evaluate the Random Forest model
rf_accuracy = accuracy_score(y_test, rf_y_pred)
rf_report = classification_report(y_test, rf_y_pred)

print(f'Random Forest Accuracy: {rf_accuracy:.2f}')
print('Random Forest Classification Report:')
print(rf_report)

# Initialize and train the XGBoost classifier
xgb_classifier = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss', random_state=42)
xgb_classifier.fit(X_train, y_train)

# Make predictions with XGBoost
xgb_y_pred = xgb_classifier.predict(X_test)

# Evaluate the XGBoost model
xgb_accuracy = accuracy_score(y_test, xgb_y_pred)
xgb_report = classification_report(y_test, xgb_y_pred)

print(f'XGBoost Accuracy: {xgb_accuracy:.2f}')
print('XGBoost Classification Report:')
print(xgb_report)


Random Forest Accuracy: 0.83
Random Forest Classification Report:
              precision    recall  f1-score   support

           0       0.76      0.89      0.82        18
           1       0.85      0.77      0.81        22
           2       0.89      0.85      0.87        20

    accuracy                           0.83        60
   macro avg       0.84      0.84      0.83        60
weighted avg       0.84      0.83      0.83        60

XGBoost Accuracy: 0.83
XGBoost Classification Report:
              precision    recall  f1-score   support

           0       0.75      1.00      0.86        18
           1       0.89      0.73      0.80        22
           2       0.89      0.80      0.84        20

    accuracy                           0.83        60
   macro avg       0.84      0.84      0.83        60
weighted avg       0.85      0.83      0.83        60



In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import classification_report, accuracy_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical

# Load the dataset
file_path = '/content/Synthetic_Iris_Like_Data - missing_synthetic_iris_like_data.csv'
data = pd.read_csv(file_path)

# Function to fill missing values with the mean value of the column for the corresponding label
def fill_missing_with_label_mean(df, label_col):
    for col in df.columns:
        if col != label_col:
            df[col] = df.groupby(label_col)[col].transform(lambda x: x.fillna(x.mean()))
    return df

# Fill missing values
data = fill_missing_with_label_mean(data, 'class')

# Split the data into features and labels
X = data.drop('class', axis=1)
y = data['class']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Initialize and train the Random Forest classifier
rf_classifier = RandomForestClassifier(random_state=42)
rf_classifier.fit(X_train, y_train)

# Make predictions with Random Forest
rf_y_pred = rf_classifier.predict(X_test)

# Evaluate the Random Forest model
rf_accuracy = accuracy_score(y_test, rf_y_pred)
rf_report = classification_report(y_test, rf_y_pred)

print(f'Random Forest Accuracy: {rf_accuracy:.2f}')
print('Random Forest Classification Report:')
print(rf_report)

# Initialize and train the XGBoost classifier
xgb_classifier = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss', random_state=42)
xgb_classifier.fit(X_train, y_train)

# Make predictions with XGBoost
xgb_y_pred = xgb_classifier.predict(X_test)

# Evaluate the XGBoost model
xgb_accuracy = accuracy_score(y_test, xgb_y_pred)
xgb_report = classification_report(y_test, xgb_y_pred)

print(f'XGBoost Accuracy: {xgb_accuracy:.2f}')
print('XGBoost Classification Report:')
print(xgb_report)

# Convert labels to categorical for neural network
y_train_cat = to_categorical(y_train)
y_test_cat = to_categorical(y_test)

# Initialize and train the neural network
nn_model = Sequential()
nn_model.add(Dense(10, input_shape=(X_train.shape[1],), activation='relu'))
nn_model.add(Dense(y_train_cat.shape[1], activation='softmax'))

nn_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
nn_model.fit(X_train, y_train_cat, epochs=50, batch_size=10, verbose=0)

# Make predictions with the neural network
nn_y_pred_cat = nn_model.predict(X_test)
nn_y_pred = nn_y_pred_cat.argmax(axis=1)

# Evaluate the neural network model
nn_accuracy = accuracy_score(y_test, nn_y_pred)
nn_report = classification_report(y_test, nn_y_pred)

print(f'Neural Network Accuracy: {nn_accuracy:.2f}')
print('Neural Network Classification Report:')
print(nn_report)


Random Forest Accuracy: 0.83
Random Forest Classification Report:
              precision    recall  f1-score   support

           0       0.76      0.89      0.82        18
           1       0.85      0.77      0.81        22
           2       0.89      0.85      0.87        20

    accuracy                           0.83        60
   macro avg       0.84      0.84      0.83        60
weighted avg       0.84      0.83      0.83        60

XGBoost Accuracy: 0.83
XGBoost Classification Report:
              precision    recall  f1-score   support

           0       0.75      1.00      0.86        18
           1       0.89      0.73      0.80        22
           2       0.89      0.80      0.84        20

    accuracy                           0.83        60
   macro avg       0.84      0.84      0.83        60
weighted avg       0.85      0.83      0.83        60

Neural Network Accuracy: 0.77
Neural Network Classification Report:
              precision    recall  f1-score   supp