<a href="https://colab.research.google.com/github/17092003vamsi/guner/blob/main/meta_learner_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Conv1D, MaxPooling1D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

# Load and preprocess data
data = pd.read_csv('project 2 sap.csv')  # Adjust file path if needed
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(data['Lable'].values)
X = data.drop(columns=['Lable'])

# Identify and handle non-numeric columns
# You may need to adapt this based on your specific data and desired handling
for column in X.columns:
    if X[column].dtype == object:  # Check if the column is of object type (likely string)
        try:
            # Attempt to convert to numeric, coercing errors to NaN
            X[column] = pd.to_numeric(X[column], errors='coerce')
        except ValueError:
            # If conversion fails, handle appropriately (e.g., drop, fill with a value, or encode)
            print(f"Column '{column}' contains non-numeric values. Consider dropping, filling, or encoding.")
            # Example: Drop the column
            # X = X.drop(columns=[column])

# Now convert to numpy array after handling non-numeric columns
X = X.values

# Replace NaNs with a fixed value or use imputation
X = np.nan_to_num(X)

# Ensure data is in a supported float type (e.g., float32)
X = X.astype(np.float32)  # Convert to float32

# ... (Rest of your code remains the same) ...

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=42)

# CNN Model
def build_cnn_model(input_shape):
    model = Sequential([
        Input(shape=input_shape),  # Use Input layer here
        Conv1D(64, kernel_size=3, activation='relu'),
        BatchNormalization(),
        MaxPooling1D(pool_size=2),
        Dropout(0.25),
        Conv1D(128, kernel_size=3, activation='relu'),
        BatchNormalization(),
        MaxPooling1D(pool_size=2),
        Dropout(0.25),
        Flatten(),
        Dense(128, activation='relu'),
        BatchNormalization(),
        Dropout(0.5),
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Preprocess data for CNN
X_train_cnn = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_val_cnn = X_val.reshape((X_val.shape[0], X_val.shape[1], 1))
X_test_cnn = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

cnn_model = build_cnn_model((X_train.shape[1], 1))
cnn_model.fit(X_train_cnn, y_train, validation_data=(X_val_cnn, y_val), epochs=20, batch_size=32,
              callbacks=[EarlyStopping(monitor='val_loss', patience=5), ReduceLROnPlateau()])

# Random Forest Model
rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train, y_train)

# Predictions for Meta-Learner
cnn_val_preds = cnn_model.predict(X_val_cnn).flatten()
rf_val_preds = rf_model.predict_proba(X_val)[:, 1]
meta_X_val = np.vstack((cnn_val_preds, rf_val_preds)).T

cnn_test_preds = cnn_model.predict(X_test_cnn).flatten()
rf_test_preds = rf_model.predict_proba(X_test)[:, 1]
meta_X_test = np.vstack((cnn_test_preds, rf_test_preds)).T

# Meta-Learner Model
meta_model = LogisticRegression()
meta_model.fit(meta_X_val, y_val)

# Final Predictions and Evaluation
meta_test_preds = meta_model.predict(meta_X_test)
print("Meta-Learner Accuracy:", accuracy_score(y_test, meta_test_preds))
print("Classification Report:\n", classification_report(y_test, meta_test_preds))


Epoch 1/20
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 577ms/step - accuracy: 0.2008 - loss: -2.8707 - val_accuracy: 0.2032 - val_loss: 0.5901 - learning_rate: 0.0010
Epoch 2/20
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 580ms/step - accuracy: 0.2023 - loss: -8.3082 - val_accuracy: 0.2086 - val_loss: -4.7231 - learning_rate: 0.0010
Epoch 3/20
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 494ms/step - accuracy: 0.1969 - loss: -12.6303 - val_accuracy: 0.2086 - val_loss: -10.5782 - learning_rate: 0.0010
Epoch 4/20
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 528ms/step - accuracy: 0.2090 - loss: -17.5004 - val_accuracy: 0.2139 - val_loss: -12.6964 - learning_rate: 0.0010
Epoch 5/20
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 599ms/step - accuracy: 0.1768 - loss: -22.5296 - val_accuracy: 0.2406 - val_loss: -18.5986 - learning_rate: 0.0010
Epoch 6/20
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
