In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
col = ['id', 'diagnosis'] + [f'feature{i}' for i in range(1, 31)]
DataSet = pd.read_csv("/content/drive/MyDrive/Breast_cancer.csv", names=col)
DataSet['diagnosis'] = DataSet['diagnosis'].map({'M': 1, 'B': 0})
DataSet.head()


Unnamed: 0,id,diagnosis,feature1,feature2,feature3,feature4,feature5,feature6,feature7,feature8,...,feature21,feature22,feature23,feature24,feature25,feature26,feature27,feature28,feature29,feature30
0,842302,1,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,842517,1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,84300903,1,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
3,84348301,1,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,...,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
4,84358402,1,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,...,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678


In [None]:
X=DataSet.iloc[:,2:].values
Y=DataSet.iloc[:,1].values
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.2,random_state=0)

In [None]:
class LogisticRegression:
    def __init__(self, learning_rate=0.01, num_iterations=1000):
        self.learning_rate = learning_rate
        self.num_iterations = num_iterations
        self.weights = None
        self.bias = None

    @staticmethod
    def sigmoid(z):
        return 1 / (1 + np.exp(-z))

    def initialize_weights(self, n_features):
        self.weights = np.zeros(n_features)
        self.bias = 0

    def compute_cost(self, X, y): #minimizing cross entropy
        m = X.shape[0]
        z = np.dot(X, self.weights) + self.bias
        predictions = self.sigmoid(z)
        cost = -(1 / m) * np.sum(y * np.log(predictions) + (1 - y) * np.log(1 - predictions))
        return cost

    def optimize_weights(self, X, y): #using gradient descent
        m = X.shape[0]
        for _ in range(self.num_iterations):
            z = np.dot(X, self.weights) + self.bias
            predictions = self.sigmoid(z)

            dw = (1 / m) * np.dot(X.T, (predictions - y))
            db = (1 / m) * np.sum(predictions - y)

            self.weights -= self.learning_rate * dw
            self.bias -= self.learning_rate * db

    def train(self, X_train, y_train): #training the model fitting the data
        n_features = X_train.shape[1]
        self.initialize_weights(n_features)
        self.optimize_weights(X_train, y_train)

    def predict(self, X):
        z = np.dot(X, self.weights) + self.bias
        predictions = self.sigmoid(z)
        return (predictions >= 0.5).astype(int)


In [None]:
SAmodel = LogisticRegression(learning_rate=0.01, num_iterations=1000)

SAmodel.train(X_train, Y_train)

# Make predictions
train_predictions = SAmodel.predict(X_train)
test_predictions = SAmodel.predict(X_test)

# Evaluate accuracy
train_accuracy = np.mean(train_predictions == Y_train) * 100
test_accuracy = np.mean(test_predictions == Y_test) * 100

print(f"Train Accuracy: {train_accuracy:.2f}%")
print(f"Test Accuracy: {test_accuracy:.2f}%")


  return 1 / (1 + np.exp(-z))


Train Accuracy: 90.11%
Test Accuracy: 85.96%


In [None]:
class KNearestNeighbors:
    def __init__(self, k=3):
        self.k = k
        self.X_train = None
        self.y_train = None

    @staticmethod
    def euclidean_distance(x1, x2):
        return np.sqrt(np.sum((x1 - x2) ** 2))

    def fit(self, X_train, y_train):
        """Store the training data."""
        self.X_train = X_train
        self.y_train = y_train

    def get_neighbors(self, X_test_instance):
        distances = [
            (i, self.euclidean_distance(self.X_train[i], X_test_instance))
            for i in range(self.X_train.shape[0])
        ]
        distances.sort(key=lambda x: x[1])
        neighbors = [idx for idx, dist in distances[:self.k]]
        return neighbors

    def predict_instance(self, X_test_instance):
        neighbors = self.get_neighbors(X_test_instance)
        neighbor_labels = [self.y_train[j] for j in neighbors]
        return np.bincount(neighbor_labels).argmax()

    def predict(self, X_test):
        return np.array([self.predict_instance(X_test[i]) for i in range(X_test.shape[0])])


In [None]:
k = [i for i in range(1,36,2)]
for i in k:
  SAknn = KNearestNeighbors(k=i)
  SAknn.fit(X_train, Y_train)
  y_pred_train = SAknn.predict(X_train)
  y_pred_test = SAknn.predict(X_test)

# Evaluate accuracy
  train_accuracy = np.mean(y_pred_train == Y_train) * 100
  test_accuracy = np.mean(y_pred_test == Y_test) * 100
  print(f'For K= {i}')
  print(f"Train Accuracy: {train_accuracy:.2f}%")
  print(f"Test Accuracy: {test_accuracy:.2f}%")
SAknn = KNearestNeighbors(k=k)

#Fit the model
SAknn.fit(X_train, Y_train)

#Make predictions



For K= 1
Train Accuracy: 100.00%
Test Accuracy: 91.23%
For K= 3
Train Accuracy: 96.04%
Test Accuracy: 91.23%
For K= 5
Train Accuracy: 94.95%
Test Accuracy: 93.86%


KeyboardInterrupt: 