### Load the Data

In [None]:
import pandas as pd

# Load the dataset
df = pd.read_excel('../Data/After/2023.02.StartOfSemester.Coded.csv')
print(df.head())

### Data Cleaning   

In [None]:
import re

def clean_text(text):
    # Remove special characters and numbers
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    # Convert to lowercase
    text = text.lower()
    # Remove extra spaces
    text = re.sub(r'\s+', ' ', text).strip()
    return text

# Apply the cleaning function to the columns
df['concerns'] = df['concerns'].apply(clean_text)
df['anything else'] = df['anything else'].apply(clean_text)
print(df.head())

### Data Tokenization and Stopword Removal  

In [None]:
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

stop_words = set(stopwords.words('english'))

def preprocess_text(text):
    # Tokenize text
    tokens = word_tokenize(text)
    # Remove stopwords
    tokens = [word for word in tokens if word not in stop_words]
    return ' '.join(tokens)

# Apply preprocessing function to the columns
df['concerns'] = df['concerns'].apply(preprocess_text)
df['anything else'] = df['anything else'].apply(preprocess_text)
print(df.head())

### Backend Development

In [None]:
from flask import Flask, request, jsonify
import joblib

app = Flask(__name__)

# Load the trained model
model = joblib.load('model.pkl')

@app.route('/upload', methods=['POST'])
def upload_file():
    file = request.files['file']
    data = pd.read_excel(file)
    # Preprocess and classify the data
    data['concerns'] = data['concerns'].apply(preprocess_text)
    data['anything else'] = data['anything else'].apply(preprocess_text)
    predictions = model.predict(data['concerns'])
    data['concerns_category'] = predictions
    response_ids = data[data['concerns_category'] != 'NC']['N'].tolist()
    return jsonify({'response_ids': response_ids})

if __name__ == '__main__':
    app.run(debug=True)


### Frontend Development

### Output Generation

In [None]:
@app.route('/download', methods=['GET'])
def download_file():
    # Process data and generate output file
    output_data = process_and_classify(data)
    output_data.to_excel('classified_responses.xlsx', index=False)
    return send_file('classified_responses.xlsx', as_attachment=True)