In [16]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score



In [17]:
df=pd.read_csv("Breast_cancer_dataset.csv")
del df["Unnamed: 32"]
data=df.iloc[:,2:]
label=df.iloc[:,1]
df.set_index("id")
x_train, x_test, y_train, y_test = train_test_split(data, label, test_size=0.2, random_state=42)






In [18]:
class GaussianNaiveBayes:
    def __init__(self,train_data,train_label):
        self.train_data = train_data
        self.train_label = train_label
        positive_nums=sum(train_label== "M")
        negative_nums=sum(train_label== "B")
        positive_data=[]
        negative_data=[]
        for data,label in zip(train_data,train_label):
            if label=="M":
                positive_data.append(data)
            else:
                negative_data.append(data)
        self.positive_possibility=positive_nums/len(train_label)
        self.negative_possibility=negative_nums/len(train_label)
        self.positive_mean=np.mean(positive_data,axis=0,dtype=np.float64)
        self.negative_mean=np.mean(negative_data,axis=0,dtype=np.float64)
        self.positive_covariance=np.cov(positive_data,rowvar=False)
        self.negative_covariance=np.cov(negative_data,rowvar=False)
        self.positive_cov_inv=np.linalg.inv(self.positive_covariance)
        self.negative_cov_inv=np.linalg.inv(self.negative_covariance)
        self.positive_cov_det=np.linalg.det(self.positive_covariance)
        self.negative_cov_det=np.linalg.det(self.negative_covariance)

    def gaussian_density(self, x, mean, cov_inv, cov_det):
        d = len(x)
        exponent = -0.5 * np.dot(np.dot((x - mean), cov_inv), (x - mean).T)
        return (1 / ((2 * np.pi) ** (d / 2) * np.sqrt(cov_det))) * np.exp(exponent)
    
    def predict(self, test_data):
        predictions = []
        for x in test_data:
            positive_possibility = self.positive_possibility * self.gaussian_density(x, self.positive_mean, self.positive_cov_inv, self.positive_cov_det)/(self.positive_possibility * self.gaussian_density(x, self.positive_mean, self.positive_cov_inv, self.positive_cov_det) + self.negative_possibility * self.gaussian_density(x, self.negative_mean, self.negative_cov_inv, self.negative_cov_det))
            negative_possibility = self.negative_possibility * self.gaussian_density(x, self.negative_mean, self.negative_cov_inv, self.negative_cov_det)/(self.positive_possibility * self.gaussian_density(x, self.positive_mean, self.positive_cov_inv, self.positive_cov_det) + self.negative_possibility * self.gaussian_density(x, self.negative_mean, self.negative_cov_inv, self.negative_cov_det))
            if positive_possibility > negative_possibility:
                predictions.append("M")
            else:
                predictions.append("B")
        return predictions
    def score(self, test_data, test_label):
        predictions = self.predict(test_data)
        return accuracy_score(test_label, predictions)
    
    

        

In [None]:
gnb=GaussianNaiveBayes(x_train.values,y_train.values)
gnb.predict(x_test.values)
print("gnb准确率为:",gnb.score(x_test.values,y_test.values))



gnb准确率为: 0.956140350877193


In [22]:
x_test

Unnamed: 0,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,symmetry_mean,fractal_dimension_mean,...,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst
204,12.47,18.60,81.09,481.9,0.09965,0.10580,0.08005,0.03821,0.1925,0.06373,...,14.97,24.64,96.05,677.9,0.1426,0.2378,0.2671,0.10150,0.3014,0.08750
70,18.94,21.31,123.60,1130.0,0.09009,0.10290,0.10800,0.07951,0.1582,0.05461,...,24.86,26.58,165.90,1866.0,0.1193,0.2336,0.2687,0.17890,0.2551,0.06589
131,15.46,19.48,101.70,748.9,0.10920,0.12230,0.14660,0.08087,0.1931,0.05796,...,19.26,26.00,124.90,1156.0,0.1546,0.2394,0.3791,0.15140,0.2837,0.08019
431,12.40,17.68,81.47,467.8,0.10540,0.13160,0.07741,0.02799,0.1811,0.07102,...,12.88,22.91,89.61,515.8,0.1450,0.2629,0.2403,0.07370,0.2556,0.09359
540,11.54,14.44,74.65,402.9,0.09984,0.11200,0.06737,0.02594,0.1818,0.06782,...,12.26,19.68,78.78,457.8,0.1345,0.2118,0.1797,0.06918,0.2329,0.08134
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
486,14.64,16.85,94.21,666.0,0.08641,0.06698,0.05192,0.02791,0.1409,0.05355,...,16.46,25.44,106.00,831.0,0.1142,0.2070,0.2437,0.07828,0.2455,0.06596
75,16.07,19.65,104.10,817.7,0.09168,0.08424,0.09769,0.06638,0.1798,0.05391,...,19.77,24.56,128.80,1223.0,0.1500,0.2045,0.2829,0.15200,0.2650,0.06387
249,11.52,14.93,73.87,406.3,0.10130,0.07808,0.04328,0.02929,0.1883,0.06168,...,12.65,21.19,80.88,491.8,0.1389,0.1582,0.1804,0.09608,0.2664,0.07809
238,14.22,27.85,92.55,623.9,0.08223,0.10390,0.11030,0.04408,0.1342,0.06129,...,15.75,40.54,102.50,764.0,0.1081,0.2426,0.3064,0.08219,0.1890,0.07796
