In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import RandomOverSampler

In [None]:
cols = ["fLength", "fWidth", "fSize", "fConc", "fConc1",
        "fAsym", "fM3Long", "fM3Trans", "fAlpha", "fDist", "class"]
df= pd.read_csv("magic04.data",names=cols)
df.head()

In [None]:
df["class"] =(df["class"] == "g").astype(int)

In [None]:
df.head()

In [None]:
for label in cols[:-1]:
    plt.hist(df[df["class"]==1][label],color='blue',label='gamma',alpha=0.7, density=True)
    plt.hist(df[df["class"] == 0][label],color='red',label='hadron',alpha=0.7, density=True)
    plt.title(label)
    plt.ylabel("Probability")
    plt.xlabel(label)
    plt.legend()
    plt.show()

Train,validation,test datasets


In [None]:
train, valid , test = np.split(df.sample(frac=1),[int(0.6*len(df)),int(0.8*len(df))])

In [None]:
def scale_dataset(dataframe,oversample=False):
    X = dataframe[dataframe.columns[:-1]].values
    y = dataframe[dataframe.columns[-1]].values

    scaler = StandardScaler()
    X = scaler.fit_transform(X)

    if oversample:
        ros = RandomOverSampler()
        X,y = ros.fit_resample(X,y)

    data = np.hstack((X,np.reshape(y,(-1,1))))
    return data,X,y 

In [None]:
#train
#print(len(train[train["class"]==1])) #gamma
#print(len(train[train["class"] == 0])) #hadron
train, X_train, y_train = scale_dataset(train,oversample=True)
valid, X_valid, y_valid = scale_dataset(valid, oversample=False)
test, X_test, y_test = scale_dataset(test, oversample=False)
# 如何快捷选择一行，复制一行，快捷选择一行中全部相同单词,多选单词（不同行）

KNN

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report

In [None]:
knn_model = KNeighborsClassifier(n_neighbors=5)
knn_model.fit(X_train,y_train)

In [None]:
y_pred = knn_model.predict(X_test)
print(classification_report(y_test, y_pred))

Naive Bayes

In [None]:
from sklearn.naive_bayes import GaussianNB

In [None]:
nb_model = GaussianNB()
nb_model = nb_model.fit(X_train,y_train)


In [None]:
y_pred = nb_model.predict(X_test)
print(classification_report(y_test, y_pred))

Logisitic Regression

In [None]:
#光标多选
from sklearn.linear_model import LogisticRegression

In [None]:
lg_model = LogisticRegression()
lg_model = lg_model.fit(X_train, y_train)

In [None]:
y_pred = lg_model.predict(X_test)
print(classification_report(y_test, y_pred))

SVM

In [None]:
from sklearn.svm import SVC

In [None]:
svm_model = SVC()
svm_model = svm_model.fit(X_train,y_train)

In [None]:
y_pred = svm_model.predict(X_test)
print(classification_report(y_test,y_pred))

Neural Network

In [None]:
import tensorflow as tf 

In [None]:
def plot_loss(history):
    plt.plot(history.history['loss'],label = 'loss')
    plt.plot(history.history['val_loss'], label='val_loss')
    plt.xlabel('Epoch')
    plt.ylabel('Binary crossentropy')
    plt.legend()
    plt.grid(True)
    plt.show()

def plot_accuracy(history):
    plt.plot(history.history['accuracy'],label = 'accuracy')
    plt.plot(history.history['val_accuracy'], label='val_accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.grid(True)
    plt.show()

In [None]:
nn_model = tf.keras.Sequential([
    tf.keras.layers.Dense(32,activation='relu',input_shape=(10,)),
    tf.keras.layers.Dense(32,activation='relu'),
    tf.keras.layers.Dense(1,activation='sigmoid')
])


nn_model.compile(optimizer = tf.keras.optimizers.Adam(0.001),loss= 'binary_crossentropy',metrics=['accuracy'])


In [None]:
history = nn_model.fit(
    X_train, y_train, epochs=100,batch_size = 32,validation_split= 0.2
)

In [None]:
plot_loss(history)
plot_accuracy(history)