## importing the required libraries

In [3]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.model_selection import cross_val_score
from sklearn.pipeline import Pipeline
from pickle import dump

### Reading the train dataset

In [4]:
df=pd.read_csv('train.csv')
conversion_dict = {0: 'Real', 1: 'Fake'}
# converting the 0 and 1 into real and fake requestively
df['label'] = df['label'].replace(conversion_dict)
df.label.value_counts()

Fake    10413
Real    10387
Name: label, dtype: int64

In [5]:
# Spliting the training data
x_train,x_test,y_train,y_test=train_test_split(df['text'], df['label'], test_size=0.25, random_state=7, shuffle=True)
tfidf_vectorizer=TfidfVectorizer(stop_words='english', max_df=0.75)

In [6]:
# Transforming the text into vector
vec_train=tfidf_vectorizer.fit_transform(x_train.values.astype('U')) 
vec_test=tfidf_vectorizer.transform(x_test.values.astype('U'))

In [7]:
# Passive Aggressive Classifier
pac=PassiveAggressiveClassifier(max_iter=50)
pac.fit(vec_train,y_train)

PassiveAggressiveClassifier(max_iter=50)

In [8]:
# Creating the pickle file
dump(tfidf_vectorizer, open('transformation.pkl', 'wb'))
dump(pac, open('model.pkl', 'wb'))

In [7]:
y_pred=pac.predict(vec_test)
score=accuracy_score(y_test,y_pred)
print(f'PAC Accuracy: {round(score*100,2)}%')

PAC Accuracy: 96.31%
