In [None]:
import pandas as pd

# Load the dataset
data = pd.read_csv("bank-full.csv", sep=';')  # Assuming a semicolon as the separator
print(data.head())
print(data.info())


In [None]:
# checking whether all rows in every column of the dataset have the same value

for column in data.columns:
    if data[column].nunique() == 1:
        print(f"All rows in the column '{column}' have the same value: {data[column].iloc[0]}")
    else:
        print(f"The column '{column}' has {data[column].nunique()} unique values.")


In [None]:
# Drop the Low relevant columns


columns_to_drop = ['day', 'month']
data_dropped_column = data.drop(columns=columns_to_drop)

# Display the DataFrame after dropping columns
print("\nDataFrame after dropping columns:")
print(data_dropped_column)

output_file = "data_dropped_column.csv"
data_dropped_column.to_csv(output_file, index=False)

print(f"\nDataFrame has been saved to {output_file}")





In [None]:
# Checking Null values in column and Raws

print("\nNull values in each column:")
print(data_dropped_column.isnull().sum())

# Check for rows with any null values
print("\nRows with any null values:")
print(data_dropped_column.isnull().any(axis=1))

# Check total null values in the DataFrame
print("\nTotal null values in the DataFrame:")
print(data_dropped_column.isnull().sum().sum())

In [None]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder

# Load the data
data = pd.read_csv("data_dropped_column.csv")  # Replace with the correct path if necessary


# Columns to one-hot encode (if required for the new dataset)
one_hot_columns = ['job', 'marital', 'contact', 'poutcome']

# Check if columns exist before encoding
for column in one_hot_columns:
    if column in data.columns:
        data = pd.get_dummies(data, columns=[column], drop_first=True)



label_encode_columns = ['education', 'default', 'housing', 'loan', 'y']

# Label encode
label_encoder = LabelEncoder()
for col in label_encode_columns:
    if col in data.columns:  # Check if column exists before encoding
        data[col] = label_encoder.fit_transform(data[col])


# Convert TRUE/FALSE to 1/0
data = data.replace({True: 1, False: 0})


# Save the transformed dataset
output_file = "encoded_data.csv"
data.to_csv(output_file, index=False)

print(f"Transformed dataset has been saved to {output_file}")
print(data.head())

# Display the updated DataFrame (Optional)
print(data.head())




In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler

# Load the dropped-column dataset
data = pd.read_csv("encoded_data.csv")

# Identify numerical columns for scaling
numerical_columns = ['age', 'balance', 'duration', 'campaign', 'pdays', 'previous']

# Initialize the scaler
scaler = StandardScaler()

# Apply scaling
data[numerical_columns] = scaler.fit_transform(data[numerical_columns])

# Display the scaled DataFrame
print("\nScaled DataFrame:")
print(data.head())

# Save the scaled dataset to a new CSV file
output_file = "scaled_data.csv"
data.to_csv(output_file, index=False)

print(f"\nScaled dataset has been saved to {output_file}")


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split

# Load the data from a CSV file
data = pd.read_csv('scaled_data.csv')  # Replace with the path to your CSV file

# Separate features (X) and target (y)
X = data.drop('y', axis=1)  # Features
y = data['y']               # Target (label)

# Split the data into 80% training and 20% testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Verify the split
print(f"Training set size: {X_train.shape}")
print(f"Testing set size: {X_test.shape}")


In [None]:
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score


# Scale the features for neural network input
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define the neural network model
model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(X_train_scaled.shape[1],)),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')  # Use softmax if multi-class classification
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])  # Use categorical_crossentropy for multi-class

# Train the model with validation data
history = model.fit(X_train_scaled, y_train, epochs=10, batch_size=32, validation_data=(X_test_scaled, y_test))

# Print validation loss and accuracy after training
print(f"Final Validation Loss: {history.history['val_loss'][-1]}")
print(f"Final Validation Accuracy: {history.history['val_accuracy'][-1] * 100:.2f}%")

# Evaluate the model on the test data
y_pred_nn = model.predict(X_test_scaled)
y_pred_nn = (y_pred_nn > 0.5).astype(int)  # Convert to binary class if needed

# Evaluate accuracy
accuracy_nn = accuracy_score(y_test, y_pred_nn)
print(f"Test Accuracy: {accuracy_nn * 100:.2f}%")


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Scale the features for neural network input (optional for Random Forest, but helps if needed)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize the Random Forest Classifier
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)

# Perform cross-validation
cv_scores = cross_val_score(rf_model, X_train_scaled, y_train, cv=5, scoring='accuracy')

# Print the cross-validation scores and the mean score
print(f"Cross-Validation Accuracy Scores: {cv_scores}")
print(f"Mean Cross-Validation Accuracy: {cv_scores.mean() * 100:.2f}%")

# Train the model on the full training data
rf_model.fit(X_train_scaled, y_train)

# Evaluate the model on the test set
y_pred_rf = rf_model.predict(X_test_scaled)

# Evaluate accuracy
accuracy_rf = accuracy_score(y_test, y_pred_rf)
print(f"Test Accuracy: {accuracy_rf * 100:.2f}%")

# Optionally, you can also print other metrics like confusion matrix, classification report, etc.
from sklearn.metrics import confusion_matrix, classification_report

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_rf))

print("Classification Report:")
print(classification_report(y_test, y_pred_rf))
