In [4]:
import tensorflow as tf
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
import random
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder

# Function to load and preprocess user dataset
def load_and_preprocess_user_data(dataset_path, target_column):
    # Load the dataset
    data = pd.read_csv(dataset_path)  # Assuming CSV file format
    
    # Handle missing values
    data['Age'] = data['Age'].fillna(data['Age'].mean())  # Fill missing Age with mean
    data['Fare'] = data['Fare'].fillna(data['Fare'].mean())  # Fill missing Fare with mean
    data['Embarked'] = data['Embarked'].fillna(data['Embarked'].mode()[0])  # Fill missing Embarked with the most frequent value
    data = data.dropna(subset=[target_column])  # Drop rows where the target column (Survived) is missing
    
    # Drop columns that are not needed for prediction (e.g., Name, Ticket, Cabin)
    data = data.drop(columns=['Name', 'Ticket', 'Cabin'], errors='ignore')

    # Convert categorical columns to numeric using LabelEncoder
    categorical_columns = ['Embarked']  # Only encode Embarked as it's categorical
    for column in categorical_columns:
        label_encoder = LabelEncoder()
        data[column] = label_encoder.fit_transform(data[column])

    # Separate features and target
    X = data.drop(columns=[target_column])
    y = data[target_column]
    
    # Standardize the features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # Encode the target variable (if it's categorical)
    y_encoded = tf.keras.utils.to_categorical(y)

    # Split into training and test sets
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_encoded, test_size=0.2, random_state=42)
    
    return (X_train, y_train), (X_test, y_test), label_encoder

# Function to create the MLP model
def create_model(input_dim):
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(512, activation='relu', input_shape=(input_dim,)),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(256, activation='relu'),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(10, activation='softmax')  # Change this to match your dataset's number of classes
    ])
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# Function to train the model
def train(model, x_train, y_train):
    history = model.fit(
        x_train, y_train,
        batch_size=128,
        epochs=20,
        validation_split=0.2,
        verbose=1
    )
    return history

# Function to evaluate the model
def evaluate_model_with_metrics(model, x_test, y_test, label_encoder):
    y_pred = model.predict(x_test)
    y_pred_classes = y_pred.argmax(axis=1)  
    y_true = y_test.argmax(axis=1)         
    
    # Inverse transform to get the original labels
    y_true_labels = label_encoder.inverse_transform(y_true)
    y_pred_labels = label_encoder.inverse_transform(y_pred_classes)
    
    cm = confusion_matrix(y_true_labels, y_pred_labels)
    
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=label_encoder.classes_, yticklabels=label_encoder.classes_)
    plt.xlabel('Predicted Label')
    plt.ylabel('True Label')
    plt.title('Confusion Matrix')
    plt.show()

    print("Classification Report:")
    print(classification_report(y_true_labels, y_pred_labels))

def plot_training_history(history):
    plt.figure(figsize=(12, 6))
    plt.subplot(1, 2, 1)
    plt.plot(history.history['accuracy'], label='Train Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.title('Training and Validation Accuracy')
    plt.legend()
    plt.grid(True)

    plt.subplot(1, 2, 2)
    plt.plot(history.history['loss'], label='Train Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.title('Training and Validation Loss')
    plt.legend()
    plt.grid(True)

    plt.tight_layout()
    plt.show()

def visualize_predictions(x_test, y_test, y_pred, label_encoder, num_samples=10):
    indices = random.sample(range(x_test.shape[0]), num_samples)  
    fig, axes = plt.subplots(1, num_samples, figsize=(15, 5))

    y_true_classes = y_test.argmax(axis=1)  
    y_pred_classes = y_pred.argmax(axis=1)  

    y_true_labels = label_encoder.inverse_transform(y_true_classes)
    y_pred_labels = label_encoder.inverse_transform(y_pred_classes)

    for i, ax in enumerate(axes):
        idx = indices[i]
        ax.imshow(x_test[idx].reshape(28, 28), cmap='gray')  # Reshape if your data is image data
        ax.set_title(f"True: {y_true_labels[idx]}\nPred: {y_pred_labels[idx]}")
        ax.axis('off')

    plt.tight_layout()
    plt.show()

# Main script
if __name__ == "__main__":
    # Define dataset path and target column here
    dataset_path = "titanic.csv"  # Replace with your dataset path
    target_column = "Survived"  # Target column in Titanic dataset (1 = survived, 0 = did not survive)

    # Load and preprocess data
    (x_train, y_train), (x_test, y_test), label_encoder = load_and_preprocess_user_data(dataset_path, target_column)
    
    # Create model
    model = create_model(x_train.shape[1])
    
    # Train model
    history = train(model, x_train, y_train)
    plot_training_history(history)

    # Evaluate model
    y_pred = model.predict(x_test)
    evaluate_model_with_metrics(model, x_test, y_test, label_encoder)

    # Visualize predictions
    visualize_predictions(x_test, y_test, y_pred, label_encoder)


ValueError: could not convert string to float: 'male'

In [6]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical

# Load and preprocess custom Iris dataset
def load_and_preprocess_iris_data(dataset_path, target_column):
    # Load the dataset (example with CSV file)
    data = pd.read_csv(dataset_path)

    # Display first few rows of the dataset to understand its structure
    print(data.head())

    # Check for missing values
    print("Missing values before filling:", data.isnull().sum())

    # Fill missing values with the mean of each column (only for numerical columns)
    data = data.fillna(data.select_dtypes(include=[np.number]).mean())

    # Check for missing values after filling
    print("Missing values after filling:", data.isnull().sum())

    # Encode the target variable (species) using LabelEncoder
    label_encoder = LabelEncoder()
    data[target_column] = label_encoder.fit_transform(data[target_column])

    # Separate features (X) and target labels (y)
    X = data.drop(columns=[target_column])  # Drop the target column
    y = data[target_column]

    # Normalize the feature data (standardization)
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # One-hot encode the target variable
    y_encoded = to_categorical(y)  # Converts to one-hot encoding for multi-class classification

    # Split into training and testing sets (80% train, 20% test)
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_encoded, test_size=0.2, random_state=42)

    return (X_train, y_train), (X_test, y_test)


# Example of a generic neural network model
def create_model(input_shape):
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(512, activation='relu', input_shape=input_shape),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(256, activation='relu'),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(3, activation='softmax')  # For Iris dataset (3 classes)
    ])
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model


# Example of training the model
def train_model(model, X_train, y_train):
    history = model.fit(X_train, y_train, batch_size=32, epochs=50, validation_split=0.2, verbose=1)
    return history


# Example of evaluating the model
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    y_pred_classes = y_pred.argmax(axis=1)
    y_true = y_test.argmax(axis=1)

    from sklearn.metrics import classification_report, confusion_matrix
    import seaborn as sns
    import matplotlib.pyplot as plt

    cm = confusion_matrix(y_true, y_pred_classes)
    
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=[0, 1, 2], yticklabels=[0, 1, 2])
    plt.xlabel('Predicted Label')
    plt.ylabel('True Label')
    plt.title('Confusion Matrix')
    plt.show()

    print("Classification Report:")
    print(classification_report(y_true, y_pred_classes))


# Main script to execute the workflow
if __name__ == "__main__":
    # Example usage with a custom dataset (replace with your dataset's path and target column)
    dataset_path = 'iris_dataset_missing.csv'  # Replace with the path to your dataset (make sure it's in CSV format)
    target_column = 'species'  # Replace with the name of the target column (typically 'species' in Iris dataset)

    (X_train, y_train), (X_test, y_test) = load_and_preprocess_iris_data(dataset_path, target_column)
    
    model = create_model(input_shape=(X_train.shape[1],))  # Input shape is the number of features
    history = train_model(model, X_train, y_train)

    evaluate_model(model, X_test, y_test)


   sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)  \
0                5.1               3.5                1.4               0.2   
1                4.9               3.0                1.4               0.2   
2                4.7               3.2                1.3               0.2   
3                4.6               3.1                1.5               0.2   
4                5.0               3.6                1.4               0.2   

        target  
0  Iris-setosa  
1  Iris-setosa  
2  Iris-setosa  
3  Iris-setosa  
4  Iris-setosa  
Missing values before filling: sepal length (cm)    2
sepal width (cm)     4
petal length (cm)    3
petal width (cm)     0
target               0
dtype: int64
Missing values after filling: sepal length (cm)    0
sepal width (cm)     0
petal length (cm)    0
petal width (cm)     0
target               0
dtype: int64


KeyError: 'species'