Title: Train a Classification Model (Logistic Regression)

Task 1: Email Spam Detection<br>
Dataset: Use a dataset containing labeled emails as spam or not-spam, with features such as word frequency.

In [3]:
# Write your code here
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

np.random.seed(0)
emails = pd.DataFrame({
    'Free_Word_Count': np.random.poisson(3, 100),
    'Link_Count': np.random.poisson(2, 100),
    'Uppercase_Words': np.random.poisson(5, 100),
})
emails['Spam'] = (emails['Free_Word_Count'] + emails['Link_Count'] > 6).astype(int)

X = emails[['Free_Word_Count', 'Link_Count', 'Uppercase_Words']]
y = emails['Spam']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

spam_model = LogisticRegression()
spam_model.fit(X_train, y_train)
y_pred = spam_model.predict(X_test)
print("Spam Detection Report:\n", classification_report(y_test, y_pred))


Spam Detection Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        15
           1       1.00      1.00      1.00         5

    accuracy                           1.00        20
   macro avg       1.00      1.00      1.00        20
weighted avg       1.00      1.00      1.00        20



Task 2: Predicting Tumor Malignancy<br>
Dataset: Use a dataset that contains features from tumor datasets such as size and shape indicators.

In [None]:
# Write your code here
from sklearn.datasets import load_breast_cancer

data = load_breast_cancer()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = data.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

tumor_model = LogisticRegression(max_iter=10000)
tumor_model.fit(X_train, y_train)

y_pred = tumor_model.predict(X_test)
print("Tumor Malignancy Report:\n", classification_report(y_test, y_pred))


Tumor Malignancy Report:
               precision    recall  f1-score   support

           0       1.00      0.88      0.94        42
           1       0.94      1.00      0.97        72

    accuracy                           0.96       114
   macro avg       0.97      0.94      0.95       114
weighted avg       0.96      0.96      0.96       114



Task 3: Wine Quality Classification<br>
Dataset: Use a dataset that contains chemical properties of wine along with a quality rating.
Treat quality as a binary classification (high vs. low).

In [None]:
# Write your code here
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
import pandas as pd

wine_data = load_wine()
df = pd.DataFrame(wine_data.data, columns=wine_data.feature_names)
df['quality'] = (wine_data.target > 0).astype(int)  

X = df.drop('quality', axis=1)
y = df['quality']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = LogisticRegression(max_iter=10000)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print("Wine Quality Classification Report:\n")
print(classification_report(y_test, y_pred))



Wine Quality Classification Report:

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        14
           1       1.00      1.00      1.00        22

    accuracy                           1.00        36
   macro avg       1.00      1.00      1.00        36
weighted avg       1.00      1.00      1.00        36

