In [None]:
# Import the libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, confusion_matrix

# Read the excel file into a pandas dataframe
df = pd.read_excel("/content/merged (1).xlsx")

# Reshape the dataframe into a long format using melt
df = df.melt(var_name="Category", value_name="Sentences")

# Drop any rows with missing values
df = df.dropna()

# Print the first 5 rows of the dataframe
print(df.head())

# Split the dataframe into features (X) and labels (y)
X = df["Sentences"]
y = df["Category"]

# Convert the labels into a single column with numerical values
# 0 for Black, 1 for Red, 2 for Grey
y = y.map({"Black": 0, "Red": 1, "Grey": 2})

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=200)

# Vectorize the sentences using CountVectorizer
vectorizer = CountVectorizer()
X_train = vectorizer.fit_transform(X_train)
X_test = vectorizer.transform(X_test)

# Create and fit a Multinomial Naive Bayes classifier
clf = MultinomialNB()
clf.fit(X_train, y_train)

# Predict the labels for the test set
y_pred = clf.predict(X_test)

# Print the accuracy score and the confusion matrix
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion matrix:\n", confusion_matrix(y_test, y_pred))

# Predict the probabilities for a new sentence
new_sentence = "第一条　为了促进科技成果转化为现实生产力，规范科技成果转化活动，加速科学技术进步，推动经济建设和社会发展，制定本法。"
new_vector = vectorizer.transform([new_sentence])
probs = clf.predict_proba(new_vector)
print("Probabilities for Black, Red and Grey:", probs)

  Category                                          Sentences
0    Black                        全国人大常委会关于修改《中外合资经营企业法》的决定\n
1    Black                     　　　　　　　　　　　 （中华人民共和国主席令第27号 \n
2    Black              　　　　　　　1990年4月4日第七届全国人民代表大会第三次会议通过）\n
3    Black  　　《全国人民代表大会关于修改〈中华人民共和国中外合资经营企业法〉的决定》已由中华人民共和国...
4    Black            　　　　　　　　　　　　　　　　　　　　　 　　中华人民共和国主席 杨尚昆\n
Accuracy: 0.7007481296758105
Confusion matrix:
 [[266   0   2]
 [ 19   2   3]
 [ 95   1  13]]
Probabilities for Black, Red and Grey: [[0.98574308 0.0049788  0.00927813]]
