<a href="https://colab.research.google.com/github/Sreyagavara/Machine-learning/blob/main/LLB4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

# Expanded dataset for better evaluation
data = {
    'Email Length': [150, 200, 120, 250, 180, 300, 350, 220, 190, 310],
    'Keyword Frequency': [5, 2, 8, 3, 7, 4, 6, 1, 9, 5],
    'Contains Link': ['Yes', 'No', 'Yes', 'No', 'Yes', 'No', 'Yes', 'No', 'Yes', 'No'],
    'Sender': ['A', 'B', 'A', 'C', 'A', 'B', 'C', 'A', 'B', 'C'],
    'Spam': [1, 0, 1, 0, 1, 0, 1, 0, 1, 0]  # 1: Spam, 0: Not Spam
}

# Creating DataFrame
df = pd.DataFrame(data)

# Encoding categorical variables
df['Contains Link'] = df['Contains Link'].map({'Yes': 1, 'No': 0})
df = pd.get_dummies(df, columns=['Sender'], drop_first=True)

# Features and target variable
X = df.drop('Spam', axis=1)
y = df['Spam']

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Logistic Regression Model
model = LogisticRegression(max_iter=200)
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)

# Evaluation Metrics
accuracy = accuracy_score(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)

# Cross-validation for better evaluation
cv = StratifiedKFold(n_splits=5)
cv_scores = cross_val_score(model, X, y, cv=cv, scoring='accuracy')

# Display the results
print(f'Accuracy: {accuracy * 100:.2f}%')
print(f'Classification Report:\n{classification_rep}')
print(f'Cross-Validation Accuracy Scores: {cv_scores}')
print(f'Mean Cross-Validation Accuracy: {cv_scores.mean() * 100:.2f}%')


Accuracy: 100.00%
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         1
           1       1.00      1.00      1.00         1

    accuracy                           1.00         2
   macro avg       1.00      1.00      1.00         2
weighted avg       1.00      1.00      1.00         2

Cross-Validation Accuracy Scores: [1.  1.  1.  0.5 1. ]
Mean Cross-Validation Accuracy: 90.00%
