-
Notifications
You must be signed in to change notification settings - Fork 0
/
svc.py
69 lines (50 loc) · 2.43 KB
/
svc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.feature_extraction.text import TfidfVectorizer
from get_data_from_files import get_dataset
def classification(max_feautures, min_df, max_df, kernel, degree):
vectorizer = TfidfVectorizer(max_features=max_feautures,
min_df = min_df,
max_df = max_df)
vectorized_reviews = vectorizer.fit_transform(list_reviews).toarray()
x_train, x_test, y_train, y_test = train_test_split(vectorized_reviews,
dataset["label"],
test_size=0.3,
shuffle = True)
classifier = SVC(kernel = kernel, degree = degree)
classifier.fit(x_train, y_train)
y_pred = classifier.predict(x_test)
print("Отчёт по классификации: ")
print(classification_report(y_test, y_pred))
print("Точность: " + str(accuracy_score(y_test, y_pred)))
return accuracy_score(y_test, y_pred)
def test():
accuracy_list = []
value_list = []
for value in range(1000, 11000, 500):
accuracy = classification(5500, 40, 0.82, "poly", 2)
accuracy_list.append(accuracy)
value_list.append(value)
data = np.column_stack((accuracy_list, value_list))
df = pd.DataFrame(data, columns = ["Точность", "min_df"])
print(df)
def get(max_feautures, min_df, max_df, kernel, degree, comment):
reviews_string[0] = comment
vectorizer = TfidfVectorizer(max_features=max_feautures, min_df = min_df, max_df = max_df)
reviews_vectorized = vectorizer.fit_transform(reviews_string).toarray()
x_train, x_test, y_train, y_test = train_test_split(reviews_vectorized, dataset["label"], test_size=0.3, shuffle = True)
classifier = SVC(kernel = kernel, degree = degree)
classifier.fit(x_train, y_train)
predicted = classifier.predict(x_test)
if __name__ == "__main__":
dataset = get_dataset("cleaned_dataset")
reviews = dataset["review"]
list_reviews = []
for review in reviews.tolist():
list_reviews.append(str(review))
classification(5500, 40, 0.82, "poly", 2)
test()
get(5500, 40, 0.82, "poly", 2, comment)