# Klasifikasi Sentimen dengan algoritma KNN
Setelah analisis sentimen pada tweet, tahap selanjutnya adalah klasifikasi sentimen pada dataset menggunakan algoritma K-Nearest Neighbor

## 1. Import Dataset hasil analisis sentimen

In [1]:
import pandas as pd

# Load the main dataset
main_dataset_path = 'sentiment_analysis_results.csv'
dataset = pd.read_csv(main_dataset_path)

# Pastikan kolom sentiment ada di dataset
print(dataset[['sentiment']].head())

  sentiment
0   neutral
1  positive
2  negative
3  positive
4   neutral


## 2. Import Dataset Unigram Features

In [2]:
# Load unigram features from file
feature_file_path = 'unigram_features_only.csv'
feature_df = pd.read_csv(feature_file_path)

# Display the first few rows of feature_df
print(feature_df.head())

   __  abide  able  aboard  abyss  accept  accepted  according  accurate  \
0   0      0     0       0      0       0         0          0         0   
1   0      0     0       0      0       0         0          0         0   
2   0      0     0       0      0       0         0          0         0   
3   0      0     0       0      0       0         0          0         0   
4   0      0     0       0      0       0         0          0         0   

   accustomed  ...  yes  yesterday  yet  you  youd  youprincess  youre  yup  \
0           0  ...    0          0    0    1     0            0      0    0   
1           0  ...    0          0    0    0     0            0      0    0   
2           0  ...    0          0    0    0     0            0      0    0   
3           0  ...    0          0    0    1     0            0      0    0   
4           0  ...    0          0    0    1     0            0      0    0   

   zelda  zutheskunk  
0      0           0  
1      0           0  

## 3. Pisahkan label dan data

In [3]:
# Ensure the number of rows in feature_df and dataset matches
assert len(feature_df) == len(dataset), "Feature and dataset row counts do not match!"

# Split data (X) and labels (y)
X = feature_df  # Feature dataframe (unigram features)
y = dataset['sentiment']  # Sentiment labels

## 4. Klasifikasi KNN dengan K = 3

In [4]:
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, accuracy_score

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the KNN classifier
k = 3  # Number of neighbors
knn = KNeighborsClassifier(n_neighbors=k)
knn.fit(X_train, y_train)

# Predict and evaluate
y_pred = knn.predict(X_test)
print("Classification Report:")
print(classification_report(y_test, y_pred))
print("Accuracy:", accuracy_score(y_test, y_pred))

Classification Report:
              precision    recall  f1-score   support

    negative       0.00      0.00      0.00        21
     neutral       0.29      1.00      0.44        20
    positive       0.94      0.48      0.63        61

    accuracy                           0.48       102
   macro avg       0.41      0.49      0.36       102
weighted avg       0.62      0.48      0.46       102

Accuracy: 0.4803921568627451
