In [3]:
import pandas as pd
import string
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

# Download stopwords and wordnet for lemmatization
nltk.download('stopwords')
nltk.download('wordnet')

# Initialize the lemmatizer and stop words
lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words('english'))

# Function to clean and preprocess text
def clean_text(text):
    # Remove punctuation
    text = text.translate(str.maketrans('', '', string.punctuation))
    
    # Remove non-alphabetic characters and convert to lowercase
    text = re.sub(r'[^a-zA-Z\s]', '', text).lower()
    
    # Tokenize and remove stop words
    words = text.split()
    words = [word for word in words if word not in stop_words]
    
    # Lemmatize words
    words = [lemmatizer.lemmatize(word) for word in words]
    
    # Join words back into a single string
    cleaned_text = ' '.join(words)
    return cleaned_text

# Define your custom response format
response_format = """CM Hate Score: {}
AOPV Hate Score: {}
CDACT Hate Score: {}
TI Hate Score: {}
TTBF Hate Score: {}
"""

# Load your dataset
df = pd.read_csv("Llama_data.csv")

# Clean and preprocess the messages
df['Message'] = df['Message'].apply(clean_text)

# Function to format the response
def format_response(row):
    return response_format.format(row['CM'], row['AOPV'], row['CDACT'], row['TI'], row['TTBF'])

# Apply the function to each row in the DataFrame
df['Response'] = df.apply(format_response, axis=1)

# Save the updated DataFrame to a new CSV file
df.to_csv("Llama_data_formatted.csv", index=False)


[nltk_data] Downloading package stopwords to /Users/aryan/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /Users/aryan/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [5]:
df.head(10)

Unnamed: 0,Message,CM,AOPV,CDACT,TI,TTBF,Response
0,havent fl year alot faggot low life cuban alot...,1.5,2.5,3.333333,1.5,2.833333,CM Hate Score: 1.5\nAOPV Hate Score: 2.5\nCDAC...
1,never assimilate jew stick sore thumb reason j...,1.666667,1.0,4.333333,4.666667,1.0,CM Hate Score: 1.666666667\nAOPV Hate Score: 1...
2,filth like barron belong chain office,2.0,1.0,3.0,1.0,1.166667,CM Hate Score: 2.0\nAOPV Hate Score: 1.0\nCDAC...
3,snip stop trolling prowhite fag huh thats oxym...,1.0,1.166667,3.166667,1.0,2.0,CM Hate Score: 1.0\nAOPV Hate Score: 1.1666666...
4,attack thank honesty right whole thing make si...,3.5,6.0,5.166667,5.5,1.333333,CM Hate Score: 3.5\nAOPV Hate Score: 6.0\nCDAC...
5,attack sorry man read thing roung im fag,1.0,1.0,2.666667,1.0,1.0,CM Hate Score: 1.0\nAOPV Hate Score: 1.0\nCDAC...
6,explode attack saw nig lovining highschool gir...,1.833333,1.0,3.833333,4.166667,2.666667,CM Hate Score: 1.833333333\nAOPV Hate Score: 1...
7,mudsharks intermarriage u year loving v virgin...,1.4,1.0,4.2,1.0,1.4,CM Hate Score: 1.4\nAOPV Hate Score: 1.0\nCDAC...
8,quote originally posted one disgusted white bo...,2.0,1.0,4.833333,2.833333,1.666667,CM Hate Score: 2.0\nAOPV Hate Score: 1.0\nCDAC...
9,quote originally posted sickle cell bblacks ca...,1.0,4.333333,3.0,1.0,3.166667,CM Hate Score: 1.0\nAOPV Hate Score: 4.3333333...
