In [None]:
import pandas as pd
import re
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline
from sklearn.metrics import accuracy_score

In [None]:
# 1. Load the data
data = pd.read_csv('/content/gmail_subhi.csv')

In [None]:
def preprocess_text(text):
    text = re.sub(r'http\S+', '', text)  # Remove URLs
    text = re.sub(r'\S+@\S+', '', text)  # Remove email addresses
    text = re.sub(r'[^a-zA-Z0-9\s]', '', text)  # Remove non-alphanumeric characters excluding spaces
    return text.lower()

data['Snippet'] = data['Snippet'].astype(str)
data['Processed_Snippet'] = data['Snippet'].apply(preprocess_text)

In [None]:
# 3. Split the data
data['Sentiment'] = ['positive' if i < len(data)/2 else 'negative' for i in range(len(data))]
X_train, X_test, y_train, y_test = train_test_split(data['Processed_Snippet'], data['Sentiment'], test_size=0.2, random_state=42)


In [None]:
# 4. Create and train the model
model = make_pipeline(TfidfVectorizer(), LogisticRegression())
model.fit(X_train, y_train)


In [None]:
# 5. Test the model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)


In [None]:
# Functions to predict sentiment and generate response
def predict_sentiment(email_snippet):
    processed_snippet = preprocess_text(email_snippet)
    return model.predict([processed_snippet])[0]

def generate_response(sentiment):
    if sentiment == "positive":
        return "Thank you for your positive feedback!"
    else:
        return "We apologize for any inconvenience. We'll do our best to address this."

In [None]:
# Test the functions
sample_snippet = "I really love you code"
predicted_sentiment = predict_sentiment(sample_snippet)
response = generate_response(predicted_sentiment)

print(predicted_sentiment, response)

positive Thank you for your positive feedback!


In [None]:
print(accuracy)

0.7763157894736842
