In [1]:
import pandas as pd
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split 
from sklearn import metrics 
from tabulate import tabulate
from sklearn.metrics import confusion_matrix
import seaborn as sns
import csv
from datetime import datetime
import openpyxl as op

In [2]:
# Import data and add labels

corona_5g = pd.read_csv("~/Desktop/twitterAnalysis/FakeNews/dataset/graphs/5g_corona_conspiracy/feature_df_5g_corona_conspiracy.csv")
corona_5g['label'] = '1'
corona_5g['conspiracy'] = '1'

non_conspiracy = pd.read_csv("~/Desktop/twitterAnalysis/FakeNews/dataset/graphs/non_conspiracy/feature_df_non_conspiracy.csv")
non_conspiracy['label'] = '2'
non_conspiracy['conspiracy'] = '0'

other_conspiracy = pd.read_csv("~/Desktop/twitterAnalysis/FakeNews/dataset/graphs/other_conspiracy/feature_df_other_conspiracy.csv")
other_conspiracy['label'] = '3'
other_conspiracy['conspiracy'] = '0'

all_data = corona_5g.append(non_conspiracy)
all_data = all_data.append(other_conspiracy)

all_data = all_data.dropna()

In [3]:
# Split into train and test sets

x = all_data[all_data.columns[0:18]]
y = all_data[all_data.columns[18]]

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=1)

In [4]:
official_test = pd.read_csv("/Users/maria/Desktop/twitterAnalysis/FakeNews/dataset/graphs/test_graphs/feature_df_test.csv")
official_test_complete = official_test.dropna()

In [5]:
# Default Naive Bayes

model = GaussianNB()
model.fit(x_train, y_train)
y_pred = model.predict(x_test) 

precision, recall, fscore, support = metrics.precision_recall_fscore_support(y_test, y_pred)
labels = ['5G-conspiracy', 'non-conspiracy', 'other-conspiracy']
acc = metrics.accuracy_score(y_test, y_pred)
mcc = metrics.matthews_corrcoef(y_test, y_pred)
prec = metrics.precision_score(y_test, y_pred, average='macro')
rec = metrics.recall_score(y_test, y_pred, average='macro')
print("Overall accuracy:",metrics.accuracy_score(y_test, y_pred), '\n')
print("MCC: " + str(mcc), '\n')
print(tabulate({"Label": labels,
               "Precision": precision,
               "Recall": recall,
               "F-score": fscore,
               "Support": support}, headers="keys"))
workbook = op.load_workbook("/Users/maria/Desktop/twitterAnalysis/FakeNews/scripts/graphs/model_summary.xlsx")
worksheet = workbook.worksheets[0]
row_n = worksheet.max_row+1
worksheet.cell(row=row_n, column=1).value = 'naive_bayes'
worksheet.cell(row=row_n, column=2).value = 'Naive Bayes'
worksheet.cell(row=row_n, column=3).value = 'Structural Data'
worksheet.cell(row=row_n, column=4).value = 'Multi-class'
worksheet.cell(row=row_n, column=5).value = mcc
worksheet.cell(row=row_n, column=6).value = acc
worksheet.cell(row=row_n, column=7).value = prec
worksheet.cell(row=row_n, column=8).value = rec
worksheet.cell(row=row_n, column=9).value = datetime.now()
workbook.save('/Users/maria/Desktop/twitterAnalysis/FakeNews/scripts/graphs/model_summary.xlsx')

Overall accuracy: 0.691304347826087 

MCC: 0.17565564948432624 

Label               Precision    Recall    F-score    Support
----------------  -----------  --------  ---------  ---------
5G-conspiracy        0.272727  0.204545   0.233766         44
non-conspiracy       0.75969   0.885542   0.817803        332
other-conspiracy     0.375     0.178571   0.241935         84


In [6]:
official_test_complete['label'] = model.predict(official_test_complete)
official_test_all = pd.concat([official_test, official_test_complete], axis=1)
labels = official_test_all['label']
labels.fillna('0', inplace=True)
labels.to_csv('naive_bayes.csv', header=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._update_inplace(new_data)


In [7]:
# Sources

# https://www.datacamp.com/community/tutorials/naive-bayes-scikit-learn