# AI Phishing Email Detector
### Using Natural Language Processing (NLP) and Machine Learning

This project demonstrates how to use AI to detect phishing emails using text-based features, logistic regression, and word vectorization techniques.

In [ ]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# Sample dataset
data = {
    'email_text': [
        'Your account has been suspended, click here to verify',
        'Meeting tomorrow at 10am, please confirm attendance',
        'Update your payment method immediately to avoid service interruption',
        'Lunch at 12 with the new interns',
        'You have won a free iPhone! Click to claim now'
    ],
    'label': [1, 0, 1, 0, 1]
}

df = pd.DataFrame(data)

# Vectorize text
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(df['email_text'])
y = df['label']

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model training
model = LogisticRegression()
model.fit(X_train, y_train)

# Prediction and accuracy
predictions = model.predict(X_test)
accuracy = accuracy_score(y_test, predictions)
print(f'Model accuracy: {accuracy * 100:.2f}%')