In [14]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC

# Load your dataset
music_data = pd.read_csv('Grievance_New.csv', encoding='latin-1')

# Check for and handle missing values (NaN) in the 'Grievance/Complaint' and 'Offense Tag(s)' columns
music_data = music_data.dropna(subset=['Grievance/Complaint', 'Offense Tag(s)'])

# Input text
new_input = "someone is using the property of the school in a wrong way."

# Initialize a TF-IDF vectorizer
tfidf_vectorizer = TfidfVectorizer()

# Transform the text data into TF-IDF vectors
X = tfidf_vectorizer.fit_transform(music_data['Grievance/Complaint'])

# Initialize the Linear SVC classifier with dual=False to suppress the warning
svc_classifier = LinearSVC(dual=False)

# Fit the classifier to the data
svc_classifier.fit(X, music_data['Offense Tag(s)'])

# Transform the new input
new_input_tfidf = tfidf_vectorizer.transform([new_input])

# Predict the top 10 offense tags for the new input
predicted_tags = svc_classifier.decision_function(new_input_tfidf)
top_10_indices = predicted_tags[0].argsort()[-10:][::-1]

# Get the corresponding offense tags and their confidence scores
predicted_offense_tags = music_data.iloc[top_10_indices]['Offense Tag(s)']
confidence_scores = predicted_tags[0][top_10_indices]

# Print the top 10 predicted offense tags and their confidence scores
for i, (offense_tag, confidence) in enumerate(zip(predicted_offense_tags, confidence_scores), 1):
    print(f"Prediction {i}: Offense Tag: {offense_tag}, Confidence Score: {confidence}")


Prediction 1: Offense Tag: 12.1.1, Confidence Score: -0.00843994507232948
Prediction 2: Offense Tag: 12.1.1, Confidence Score: -0.5216424352738593
Prediction 3: Offense Tag: 12.1.1, Confidence Score: -0.6726968416610293
Prediction 4: Offense Tag: 12.1.1, Confidence Score: -0.802223482181654
Prediction 5: Offense Tag: 12.1.1, Confidence Score: -0.8092584610445701
Prediction 6: Offense Tag: 12.1.1, Confidence Score: -0.8112847389219432
Prediction 7: Offense Tag: 12.1.1, Confidence Score: -0.8120702065572845
Prediction 8: Offense Tag: 12.1.1, Confidence Score: -0.9556549699042582
Prediction 9: Offense Tag: 12.1.1, Confidence Score: -0.9754162707456019
Prediction 10: Offense Tag: 12.1.1, Confidence Score: -1.016796090753679
