<a href="https://colab.research.google.com/github/3morii74/Machine-learning-project/blob/main/MLProject1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install pandas numpy scikit-learn tensorflow imblearn gradio matplotlib seaborn

Collecting imblearn
  Downloading imblearn-0.0-py2.py3-none-any.whl.metadata (355 bytes)
Collecting gradio
  Downloading gradio-5.25.2-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<25.0,>=22.0 (from gradio)
  Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.8.0 (from gradio)
  Downloading gradio_client-1.8.0-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.11.6-py3-none-manylinux_2

In [10]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from imblearn.over_sampling import SMOTE

# Load data (already in your notebook)
data = pd.read_csv('WA_Fn-UseC_-Telco-Customer-Churn.csv')

In [11]:
data = data.drop('customerID', axis=1)

In [12]:
data['TotalCharges'] = pd.to_numeric(data['TotalCharges'], errors='coerce')
data['TotalCharges'] = data['TotalCharges'].fillna(data['TotalCharges'].median())

In [13]:
binary_cols = ['gender', 'Partner', 'Dependents', 'PhoneService', 'PaperlessBilling', 'Churn']
label_encoder = LabelEncoder()
for col in binary_cols:
    data[col] = label_encoder.fit_transform(data[col])

In [14]:
categorical_cols = ['MultipleLines', 'InternetService', 'OnlineSecurity', 'OnlineBackup',
                   'DeviceProtection', 'TechSupport', 'StreamingTV', 'StreamingMovies',
                   'Contract', 'PaymentMethod']
data = pd.get_dummies(data, columns=categorical_cols, drop_first=True)

In [15]:
X = data.drop('Churn', axis=1)
y = data['Churn']

In [16]:
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42, stratify=y_temp)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

In [17]:
smote = SMOTE(random_state=42)
X_train, y_train = smote.fit_resample(X_train, y_train)

In [18]:
print(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
print(f"X_val shape: {X_val.shape}, y_val shape: {y_val.shape}")
print(f"X_test shape: {X_test.shape}, y_test shape: {y_test.shape}")

X_train shape: (7244, 30), y_train shape: (7244,)
X_val shape: (1056, 30), y_val shape: (1056,)
X_test shape: (1057, 30), y_test shape: (1057,)


In [21]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, Input
from tensorflow.keras.callbacks import EarlyStopping

# Disable eager execution for better performance
tf.config.run_functions_eagerly(True)

# Define model architectures
def create_model1(input_dim):
    model = Sequential([
        Input(shape=(input_dim,)),
        Dense(64, activation='relu'),
        Dense(32, activation='relu'),
        Dense(1, activation='sigmoid')
    ])
    return model

def create_model2(input_dim):
    model = Sequential([
        Input(shape=(input_dim,)),
        Dense(128, activation='relu'),
        Dense(64, activation='relu'),
        Dense(32, activation='relu'),
        Dense(16, activation='relu'),
        Dense(1, activation='sigmoid')
    ])
    return model

def create_model3(input_dim):
    model = Sequential([
        Input(shape=(input_dim,)),
        Dense(128, activation='relu'),
        BatchNormalization(),
        Dropout(0.3),
        Dense(64, activation='relu'),
        BatchNormalization(),
        Dropout(0.3),
        Dense(32, activation='relu'),
        Dense(1, activation='sigmoid')
    ])
    return model

# Optimizers
optimizers = {
    'Adam': tf.keras.optimizers.Adam(learning_rate=0.001),
    'RMSprop': tf.keras.optimizers.RMSprop(learning_rate=0.001),
    'SGD': tf.keras.optimizers.SGD(learning_rate=0.01, momentum=0.9)
}

# Model creators
model_creators = {'FNN': create_model1, 'DNN': create_model2, 'Regularized': create_model3}
histories = {}

# Early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

In [25]:
input_dim = X_train.shape[1]
for model_name, create_model in model_creators.items():
    for opt_name in ['Adam', 'RMSprop', 'SGD']:
        print(f"Training {model_name} with {opt_name}")

        # Create a fresh model instance
        model = create_model(input_dim)

        # Create a fresh optimizer instance
        if opt_name == 'Adam':
            optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
        elif opt_name == 'RMSprop':
            optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.001)
        else:  # SGD
            optimizer = tf.keras.optimizers.SGD(learning_rate=0.01, momentum=0.9)

        # Compile the model with the fresh optimizer
        model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

        # Train the model
        history = model.fit(
            X_train, y_train,
            validation_data=(X_val, y_val),
            epochs=50,
            batch_size=32,
            callbacks=[early_stopping],
            verbose=1
        )

        # Store training history
        histories[f"{model_name}_{opt_name}"] = history.history

        model.save(f"/content/{model_name}_{opt_name}.h5")


Training FNN with Adam
Epoch 1/50
[1m227/227[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 70ms/step - accuracy: 0.7202 - loss: 0.5453 - val_accuracy: 0.7206 - val_loss: 0.5431
Epoch 2/50
[1m227/227[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 56ms/step - accuracy: 0.7837 - loss: 0.4609 - val_accuracy: 0.7377 - val_loss: 0.5206
Epoch 3/50
[1m227/227[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 55ms/step - accuracy: 0.7928 - loss: 0.4392 - val_accuracy: 0.7377 - val_loss: 0.5010
Epoch 4/50
[1m227/227[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 56ms/step - accuracy: 0.7881 - loss: 0.4389 - val_accuracy: 0.7434 - val_loss: 0.5131
Epoch 5/50
[1m227/227[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 57ms/step - accuracy: 0.8022 - loss: 0.4218 - val_accuracy: 0.7557 - val_loss: 0.4857
Epoch 6/50
[1m227/227[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 61ms/step - accuracy: 0.8039 - loss: 0.4223 - val_accuracy: 0.7670 - val_loss: 

In [11]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from tensorflow.keras.models import load_model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, Input
import tensorflow as tf

# Clear Keras session to avoid conflicts
tf.keras.backend.clear_session()

# Define model architectures (needed for model_creators)
def create_model1(input_dim):
    model = Sequential([
        Input(shape=(input_dim,)),
        Dense(64, activation='relu'),
        Dense(32, activation='relu'),
        Dense(1, activation='sigmoid')
    ])
    return model

def create_model2(input_dim):
    model = Sequential([
        Input(shape=(input_dim,)),
        Dense(128, activation='relu'),
        Dense(64, activation='relu'),
        Dense(32, activation='relu'),
        Dense(16, activation='relu'),
        Dense(1, activation='sigmoid')
    ])
    return model

def create_model3(input_dim):
    model = Sequential([
        Input(shape=(input_dim,)),
        Dense(128, activation='relu'),
        BatchNormalization(),
        Dropout(0.3),
        Dense(64, activation='relu'),
        BatchNormalization(),
        Dropout(0.3),
        Dense(32, activation='relu'),
        Dense(1, activation='sigmoid')
    ])
    return model

# Model creators
model_creators = {'FNN': create_model1, 'DNN': create_model2, 'Regularized': create_model3}

# Evaluate each model on test set
test_results = {}
for model_name, create_model in model_creators.items():
    for opt_name in ['Adam', 'RMSprop', 'SGD']:
        print(f"Evaluating {model_name} with {opt_name}")

        # Load the saved model
        model = load_model(f"/content/{model_name}_{opt_name}.h5")

        # Predict on test set
        y_pred = (model.predict(X_test) > 0.5).astype(int)

        # Calculate metrics
        accuracy = accuracy_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred)
        recall = recall_score(y_test, y_pred)
        f1 = f1_score(y_test, y_pred)

        # Store results
        test_results[f"{model_name}_{opt_name}"] = {
            'accuracy': accuracy,
            'precision': precision,
            'recall': recall,
            'f1': f1
        }

# Print test results
for model_opt, metrics in test_results.items():
    print(f"\n{model_opt}:")
    print(f"Accuracy: {metrics['accuracy']:.4f}")
    print(f"Precision: {metrics['precision']:.4f}")
    print(f"Recall: {metrics['recall']:.4f}")
    print(f"F1 Score: {metrics['f1']:.4f}")

Evaluating FNN with Adam


OSError: Unable to synchronously open file (file signature not found)

In [12]:
import os
print(os.listdir('/content'))

['.config', 'Regularized_RMSprop.h5', 'Regularized_SGD.h5', 'DNN_RMSprop.h5', 'FNN_RMSprop.h5', 'DNN_Adam.h5', 'FNN_Adam.h5', 'DNN_SGD.h5', 'Regularized_Adam.h5', 'FNN_SGD.h5', 'WA_Fn-UseC_-Telco-Customer-Churn.csv', '.ipynb_checkpoints', 'sample_data']


In [13]:
for file in os.listdir('/content'):
    if file.endswith('.h5'):
        size = os.path.getsize(f'/content/{file}') / 1024  # Size in KB
        print(f"{file}: {size:.2f} KB")

Regularized_RMSprop.h5: 0.00 KB
Regularized_SGD.h5: 0.00 KB
DNN_RMSprop.h5: 0.00 KB
FNN_RMSprop.h5: 0.00 KB
DNN_Adam.h5: 0.00 KB
FNN_Adam.h5: 0.00 KB
DNN_SGD.h5: 0.00 KB
Regularized_Adam.h5: 0.00 KB
FNN_SGD.h5: 0.00 KB


In [14]:
import h5py
try:
    with h5py.File('/content/FNN_Adam.h5', 'r') as f:
        print("File is a valid HDF5 file")
        print("Keys:", list(f.keys()))
except Exception as e:
    print("Error opening file:", e)

Error opening file: Unable to synchronously open file (file signature not found)
