In [None]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neighbors.nearest_centroid import NearestCentroid
from sklearn.neighbors import RadiusNeighborsClassifier
from sklearn.neighbors.kde import KernelDensity
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import PolynomialFeatures
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

# Load data
data = pd.read_csv('bow&dic/bow_train_over15.csv')
DF_file = pd.ExcelFile('bow&dic/df_Dic_org_over15_t.xlsx')
dfreq = DF_file.parse('Sheet1')
data.columns = dfreq.columns
data = data.drop("['positive'", axis=1)
#data = data.drop(columns=[ '0.1'])# delete positive column
#data = data[:100] 


row_count = data.shape[0]
split_point = int(row_count*4/5)
train_data, test_data = data[:split_point], data[split_point:]

classifiers = [
    GaussianNB(),
    KNeighborsClassifier(n_neighbors=3),
    NearestCentroid(), 
    RadiusNeighborsClassifier(radius=50.0),
    #LogisticRegression(random_state=0, solver='lbfgs',multi_class='multinomial')
    LinearDiscriminantAnalysis(),
    QuadraticDiscriminantAnalysis()]

#kde = KernelDensity().fit(X_train)
#kde.score_samples(X_train) 
X_train = train_data.iloc[:,1:] # second column to last column
y_train = train_data.iloc[:,0] # just first column is lable
X_test = test_data.iloc[:,1:]
y_test = test_data.iloc[:,0]
    
# run classifiers for this technique
for clf in classifiers:
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    # Calculate Accuracy Rate by using accuracy_score()
    print('---------------------------------------')
    print (clf.__class__.__name__ + " test Accuracy Rate is: %f" % accuracy_score(y_test, y_pred))

    print (clf.__class__.__name__ + " F1_micro is: %f" % f1_score(y_test, y_pred, average='micro'))
    print (clf.__class__.__name__ + " F1_macro is: %f" % f1_score(y_test, y_pred, average='macro'))
    f1_sc = f1_score(y_test, y_pred)
    #print('f1_score is: %f' % f1_sc.mean())
    print('f1_score is: %f' % f1_sc)
    print("RMSE= " ,np.sqrt(mean_squared_error(y_test,y_pred)) )

    scores = cross_val_score(clf, X_train, y_train, cv=10)
    print("validation Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

'''    
# linear model LogisticRegression degree =2
polynomial_features= PolynomialFeatures(degree=2)
x_poly = polynomial_features.fit_transform(X_train)
x_poly_test = polynomial_features.fit_transform(X_test)
model = LogisticRegression(random_state=0, solver='lbfgs',multi_class='multinomial')
model.fit(x_poly, y_train)
y_poly_pred = model.predict(x_poly_test)
# Calculate Accuracy Rate by using accuracy_score()
print('---------------------------------------')
print ('LogisticRegression_degree = 2'+ " test Accuracy Rate is: %f" % accuracy_score(y_test, y_pred))

print ('LogisticRegression_degree = 2' + " F1_micro is: %f" % f1_score(y_test, y_pred, average='micro'))
print ('LogisticRegression_degree = 2' + " F1_macro is: %f" % f1_score(y_test, y_pred, average='macro'))
f1_sc = f1_score(y_test, y_pred, average=None)
print('f1_score is: %f' % f1_sc.mean())
print("RMSE= " ,np.sqrt(mean_squared_error(y_test,y_pred)) )

scores = cross_val_score(clf, X_train, y_train, cv=10)
print("validation Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
'''

