# RNN Training Script for Grievance Redressal Replier

In [None]:

import pandas as pd
import numpy as np
import os

from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense
from sklearn.model_selection import train_test_split


In [None]:

# Load data
df = pd.read_csv('../data/grievances.csv')  # Adjust path if needed

# Confirm column names
print("Columns:", df.columns)


In [None]:

# Use correct column names here
grievances = df['grievance']
responses = df['response']


In [None]:

# Tokenize grievances
tokenizer = Tokenizer()
tokenizer.fit_on_texts(grievances)
X = tokenizer.texts_to_sequences(grievances)

# Pad sequences
max_len = max(len(x) for x in X)
X_padded = pad_sequences(X, maxlen=max_len, padding='post')


In [None]:

# Tokenize responses (as labels)
response_tokenizer = Tokenizer()
response_tokenizer.fit_on_texts(responses)
y = response_tokenizer.texts_to_sequences(responses)

# Use only the first token as a class index (simplified)
y = np.array([i[0] if len(i) > 0 else 0 for i in y])


In [None]:

# Split data
X_train, X_test, y_train, y_test = train_test_split(X_padded, y, test_size=0.2, random_state=42)


In [None]:

# Build RNN model
vocab_size = len(tokenizer.word_index) + 1
output_classes = max(y) + 1

model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=64, input_length=max_len),
    SimpleRNN(64, return_sequences=False),
    Dense(output_classes, activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.summary()


In [None]:

# Train model
model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test))


In [None]:

# Save model
os.makedirs('../models', exist_ok=True)
model.save('../models/grievance_rnn.h5')


In [None]:

# Save tokenizers for later use
import pickle
with open('../models/input_tokenizer.pkl', 'wb') as f:
    pickle.dump(tokenizer, f)

with open('../models/response_tokenizer.pkl', 'wb') as f:
    pickle.dump(response_tokenizer, f)
