### **IMPORTING REQUIRED PACKAGES**:

In [37]:
#importing packages
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

### LOADING THE DATASET:

In [None]:
df=pd.read_csv("/content/Spam Email Detection - spam.csv")
df

### SLICING THE DATASET:

In [None]:
#slicing the columns(removing the last 3 columns)
df=df.iloc[:, :2]
df

### GETTING THE INFO OF THE DATASET:

In [None]:
df.info()

### DISPLAYING FIRST 5 DATA:

In [None]:
df.head()

### CHECKING THE NUMBER OF ROWS AND COLUMNS IN THE DATAFRAME:

In [None]:
df.shape

### LABEL ENCODING:

In [44]:
#spam : 0; ham : 1
df.loc[df['v1'] == 'spam', 'v1'] =0
df.loc[df['v1'] == 'ham', 'v1'] =1

### SEPARATING THE DATA AS TEXTS AND LABELS:

In [45]:
x = df['v2']
y = df['v1']

In [None]:
x,y

### TRAIN-TEST SPLIT:

In [46]:
x_train, x_test, y_train, y_test= train_test_split(x,y, train_size=0.8, random_state=0)


In [None]:
x_train.shape

In [None]:
x_test.shape

### TRANSFORMING TEXT DATA TO FEATURE VECTOR:

In [49]:
f_ext = TfidfVectorizer(min_df = 1, stop_words='english', lowercase=True)
x_train_features = f_ext.fit_transform(x_train)
x_test_features = f_ext.transform(x_test)

### CONVERT y_train AND y_test VALUES AS INTEGERS:

In [50]:
y_train = y_train.astype('int')
y_test = y_test.astype('int')

### MODEL TRAINING:

In [51]:
model=LogisticRegression()

In [None]:
#training the model using training data
model.fit(x_train_features, y_train)

### EVALUATING TRAINED MODEL:

In [None]:
#prediction on training data
pred_train = model.predict(x_train_features)
accuracy_train = accuracy_score(y_train, pred_train)
print("Accuracy on training data :",accuracy_train)

In [None]:
#prediction on testing data
pred_test = model.predict(x_test_features)
accuracy_test = accuracy_score(y_test, pred_test)
print("Accuracy on testing data :",accuracy_test)

### BUILDING EMAIL DETECTION SYSTEM:

In [71]:
sample_mail=["URGENT! You have won a 1 week FREE membership in our �100,000 Prize Jackpot! Txt the word: CLAIM to No: 81010 T&C www.dbuk.net LCCLTD POBOX 4403LDNW1A7RW18"]
#converting to feature vector
sample_mail_features=f_ext.transform(sample_mail)

### DETECTING SPAM OR NOT(SPAM : 0 ; HAM : 1)

In [None]:
pred=model.predict(sample_mail_features)
pred

if (pred[0]==1):
  print("Ham mail")
else:
  print("Spam mail")
