<a href="https://colab.research.google.com/github/Exion007/Colab/blob/main/multiclass.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import Tokenizer
from sklearn.preprocessing import LabelEncoder
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Embedding, LSTM, Dense
from sklearn.metrics import accuracy_score, classification_report

In [None]:
rawdf = pd.read_csv('merged_ls_200923.csv', engine="python", on_bad_lines="skip")
rawdf.dropna(inplace=True)

rawdf.info()

In [None]:
labels = ['AVI', 'MIS', 'OTH', 'WIN', 'WEA',
           'COC', 'PRP', 'MEC', 'ELC', 'LAG',
           'FLC', 'FFC', 'ECS', 'FLU']

def filter_by_label(df):
  filtered_df = rawdf[rawdf['label'].apply(lambda x: isinstance(x, str) and x in labels)]
  return filtered_df

df = filter_by_label(rawdf)

In [None]:
df.info()

In [None]:
X_train, X_test, y_train, y_test = train_test_split(df['text'], df['label'], test_size=0.2, random_state=42)

max_vocab_size = 10000
tokenizer = Tokenizer(num_words=max_vocab_size)
tokenizer.fit_on_texts(X_train)
X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)


max_seq_length = 100
X_train_padded = pad_sequences(X_train_seq, maxlen=max_seq_length)
X_test_padded = pad_sequences(X_test_seq, maxlen=max_seq_length)


label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)

embedding_dim = 100
num_classes = len(label_encoder.classes_)

In [None]:
model = Sequential()
model.add(Embedding(input_dim=max_vocab_size, output_dim=embedding_dim, input_length=max_seq_length))
model.add(LSTM(128))
model.add(Dense(num_classes, activation='softmax'))

model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

batch_size = 32
num_epochs = 10
model.fit(X_train_padded, y_train_encoded, epochs=num_epochs, batch_size=batch_size, validation_data=(X_test_padded, y_test_encoded))

y_pred_encoded = model.predict_classes(X_test_padded)
y_pred = label_encoder.inverse_transform(y_pred_encoded)

In [None]:
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
print(classification_report(y_test, y_pred))