<a href="https://colab.research.google.com/github/Raheelrazzaq063/finalyearproject/blob/main/code.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [8]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score, classification_report
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler
from imblearn.pipeline import Pipeline

# Load dataset
file_path = "/content/drive/MyDrive/my data scineproject/breast-cancer.csv"
df = pd.read_csv(file_path)

# Encode categorical columns using Label Encoding
label_encoders = {}
for col in df.columns:
    if df[col].dtype == 'object':
        le = LabelEncoder()
        df[col] = le.fit_transform(df[col])
        label_encoders[col] = le

# Splitting dataset into features (X) and target (y)
X = df.drop(columns=['class'])  # Features
y = df['class']  # Target

# Feature Scaling
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Splitting into training and test sets (80-20 split)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Apply Data Augmentation (SMOTE + Undersampling)
smote = SMOTE(random_state=42)
under_sampler = RandomUnderSampler(random_state=42)
pipeline = Pipeline([('smote', smote), ('under', under_sampler)])
X_train, y_train = pipeline.fit_resample(X_train, y_train)

# Initialize models with Expanded Hyperparameter Tuning
param_grid_rf = {
    'n_estimators': [100, 200, 300, 500],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10]
}
param_grid_svm = {
    'C': [0.1, 1, 10, 50],
    'kernel': ['rbf', 'linear', 'poly'],
    'gamma': ['scale', 'auto']
}

rf_grid = GridSearchCV(RandomForestClassifier(random_state=42), param_grid_rf, cv=5, n_jobs=-1)
svm_grid = GridSearchCV(SVC(probability=True), param_grid_svm, cv=5, n_jobs=-1)

# Train models
rf_grid.fit(X_train, y_train)
svm_grid.fit(X_train, y_train)

# Best models
rf_best = rf_grid.best_estimator_
svm_best = svm_grid.best_estimator_

# Evaluate models
models = {"Random Forest": rf_best, "SVM": svm_best}
results = {}
for name, model in models.items():
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    results[name] = accuracy
    print(f"{name} Accuracy: {accuracy:.4f}")
    print(classification_report(y_test, y_pred))

# Display best model
best_model_name = max(results, key=results.get)
print(f"Best Model: {best_model_name} with Accuracy: {results[best_model_name]:.4f}")


Random Forest Accuracy: 0.6897
              precision    recall  f1-score   support

           0       0.77      0.80      0.79        41
           1       0.47      0.41      0.44        17

    accuracy                           0.69        58
   macro avg       0.62      0.61      0.61        58
weighted avg       0.68      0.69      0.68        58

SVM Accuracy: 0.6724
              precision    recall  f1-score   support

           0       0.75      0.80      0.78        41
           1       0.43      0.35      0.39        17

    accuracy                           0.67        58
   macro avg       0.59      0.58      0.58        58
weighted avg       0.66      0.67      0.66        58

Best Model: Random Forest with Accuracy: 0.6897


In [9]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score, classification_report
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler
from imblearn.pipeline import Pipeline

# Load dataset
file_path = "/content/drive/MyDrive/my data scineproject/breast-cancer.csv"
df = pd.read_csv(file_path)

# Encode categorical columns using Label Encoding
label_encoders = {}
for col in df.columns:
    if df[col].dtype == 'object':
        le = LabelEncoder()
        df[col] = le.fit_transform(df[col])
        label_encoders[col] = le

# Splitting dataset into features (X) and target (y)
X = df.drop(columns=['class']).values  # Convert to numpy array
y = df['class'].values

# Feature Scaling
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Splitting into training and test sets (80-20 split)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Apply Data Augmentation (SMOTE + Undersampling)
smote = SMOTE(random_state=42)
under_sampler = RandomUnderSampler(random_state=42)
pipeline = Pipeline([('smote', smote), ('under', under_sampler)])
X_train, y_train = pipeline.fit_resample(X_train, y_train)

# Reshape data for CNN (assuming features are treated as a 1D spatial structure)
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1, 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1, 1)

# CNN Model
model = keras.Sequential([
    layers.Conv2D(32, (3, 1), activation='relu', input_shape=(X_train.shape[1], 1, 1)),
    layers.Conv2D(64, (3, 1), activation='relu'),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(1, activation='sigmoid')  # Binary classification
])

# Compile model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train model
history = model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test))

# Evaluate model
y_pred = (model.predict(X_test) > 0.5).astype("int32")
accuracy = accuracy_score(y_test, y_pred)
print(f"CNN Model Accuracy: {accuracy:.4f}")
print(classification_report(y_test, y_pred))


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/20
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 42ms/step - accuracy: 0.5854 - loss: 0.6766 - val_accuracy: 0.6034 - val_loss: 0.6609
Epoch 2/20
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.6921 - loss: 0.6194 - val_accuracy: 0.6379 - val_loss: 0.6027
Epoch 3/20
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.7028 - loss: 0.6001 - val_accuracy: 0.7069 - val_loss: 0.5893
Epoch 4/20
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.6174 - loss: 0.6156 - val_accuracy: 0.6379 - val_loss: 0.6252
Epoch 5/20
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.6580 - loss: 0.5759 - val_accuracy: 0.7069 - val_loss: 0.5886
Epoch 6/20
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.7308 - loss: 0.5036 - val_accuracy: 0.6552 - val_loss: 0.6108
Epoch 7/20
[1m10/10[0m [32m━━━━