In [120]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
import seaborn as sns
import re
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
from sklearn.feature_extraction.text import TfidfVectorizer
import pickle

In [39]:
DATA_PATH="data/article_level_data.csv"

In [40]:
df=pd.read_csv(DATA_PATH)

In [41]:
df.head()

Unnamed: 0.1,Unnamed: 0,article,class
0,0,NLP is a multidisciplinary field that draws fr...,0
1,1,There are a variety of emerging applications f...,0
2,2,As each new means of communication and social ...,0
3,3,"These suggestions include:, Learn about the pu...",0
4,4,In recent years there has been growing concern...,0


In [42]:
text_columns=['article','class']

In [43]:
df=df[text_columns]

In [44]:
df.isna().sum()

article    0
class      0
dtype: int64

In [45]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1018 entries, 0 to 1017
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   article  1018 non-null   object
 1   class    1018 non-null   int64 
dtypes: int64(1), object(1)
memory usage: 16.0+ KB


In [46]:
stemmer=PorterStemmer()

In [47]:
stemmer.stem('i can write this article')

'i can write this articl'

In [48]:
def word_cleaning(article):
    article=re.sub('[^a-zA-Z]',' ',article)
    article=article.lower()
    article=article.split()
    article=[stemmer.stem(sentences)for sentences in article if not sentences in stopwords.words('english')]
    article=" ".join(article)
    return article

In [49]:
print(word_cleaning('i can write this article'))

write articl


In [50]:
df['article']=df['article'].apply(word_cleaning)

In [51]:
df

Unnamed: 0,article,class
0,nlp multidisciplinari field draw linguist comp...,0
1,varieti emerg applic nlp includ follow voic co...,0
2,new mean commun social interact introduc socia...,0
3,suggest includ learn purpos newsgroup post gro...,0
4,recent year grow concern internet user may eve...,0
...,...,...
1013,palett refer differ thing depend context gener...,1
1014,probabl measur likelihood specif event occur m...,1
1015,compil softwar program translat sourc code wri...,1
1016,compil process process compil translat sourc c...,1


In [154]:
X=df['article']
y=df['class']

In [155]:
y.shape

(1018,)

In [156]:
from sklearn.model_selection import train_test_split


In [157]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.35, random_state=42)

In [158]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((661,), (357,), (661,), (357,))

In [159]:
vect=TfidfVectorizer()

In [160]:
X_train = vect.fit_transform(X_train)
X_test = vect.transform(X_test)

In [161]:
X_train.shape, X_test.shape

((661, 5503), (357, 5503))

In [162]:
from sklearn.tree import DecisionTreeClassifier

In [163]:
model = DecisionTreeClassifier()

In [164]:
model.fit(X_train , y_train)

DecisionTreeClassifier()

In [165]:
predction = model.predict(X_test)
predction

array([1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1,
       0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1,
       1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0,
       1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0,
       0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0,
       0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0,
       1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0,
       0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0,
       0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0,
       0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1,
       1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0,
       1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1,
       0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0,

In [166]:
model.score(X_test, y_test)

0.7030812324929971

In [167]:
pickle.dump(vect,open("vector.pkl","wb"))
pickle.dump(model,open("model.pkl","wb"))

In [168]:
vector_form=pickle.load(open('vector.pkl', 'rb'))
model = pickle.load(open("model.pkl","rb"))

In [173]:
def response(article):
    article = word_cleaning(article)
    input_data = [article]
    vector_form1= vector_form.transform(input_data)
    prediction = model.predict(vector_form1)
    return prediction

In [177]:
val=response("""In these trying times, Jackie Mason is the Voice of Reason. [In this week’s exclusive clip for Breitbart News, Jackie discusses the looming threat of North Korea, and explains how President Donald Trump could win the support of the Hollywood left if the U. S. needs to strike first.  “If he decides to bomb them, the whole country will be behind him, because everybody will realize he had no choice and that was the only thing to do,” Jackie says. “Except the Hollywood left. They’ll get nauseous. ” “[Trump] could win the left over, they’ll fall in love with him in a minute. If he bombed them for a better reason,” Jackie explains. “Like if they have no transgender toilets. ” Jackie also says it’s no surprise that Hollywood celebrities didn’t support Trump’s strike on a Syrian airfield this month. “They were infuriated,” he says. “Because it might only save lives. That doesn’t mean anything to them. If it only saved the environment, or climate change! They’d be the happiest people in the world. ” Still, Jackie says he’s got nothing against Hollywood celebs. They’ve got a tough life in this country. Watch Jackie’s latest clip above.   Follow Daniel Nussbaum on Twitter: @dznussbaum """)
print(val)
if val==[0]:
    print('Human Generated')
else:
    print('AI Generated')

[0]
Human Generated


In [179]:
val=response(""" It appears you've instantiated a TfidfVectorizer object. The TfidfVectorizer is a commonly used tool in natural language processing and text analysis for converting a collection of raw documents into a matrix of TF-IDF features. TF-IDF stands for Term Frequency-Inverse Document Frequency and is used to represent the importance of a word in a document relative to a collection of documents.

If you have specific tasks or questions related to the use of TfidfVectorizer or need assistance with using it, please provide more details, and I'd be happy to help.""")
print(val)
if val==[0]:
    print('Human Generated')
else:
    print('AI Generated')

[1]
AI Generated


In [180]:
val=response("""Certainly, I can help you generate a project name. Here's a project name you can consider:

Project Name: "AIDetect"

You can use this project name to check if responses are generated by AI or not. If the response includes the project name "AIDetect," it's likely generated by AI. This project name is a combination of "AI" (Artificial Intelligence) and "Detect," which suggests the project's focus on detecting AI-generated content.""")
print(val)
if val==[0]:
    print('Human Generated')
else:
    print('AI Generated')

[1]
AI Generated


In [185]:
res=input()
val=response(res)
print(val)
if val==[0]:
    print('Human Generated')
else:
    print('AI Generated')

Building a prompt engineering NLP (Natural Language Processing) system using Python involves several technical steps. Prompt engineering is the process of designing and optimizing prompts or queries for language models like GPT-3. Here are the steps you should take:  1. **Set Up Your Environment:**    Ensure you have Python installed and set up your development environment. You can use popular Python environments like Anaconda or virtualenv.  2. **Install Required Libraries:**    You'll need various Python libraries, including but not limited to:    - OpenAI's GPT-3 library (e.g., `openai`)    - Natural Language Toolkit (NLTK) or spaCy for text processing    - Flask or FastAPI for building a web interface    - Other data manipulation and visualization libraries as per your requirements  3. **Get API Access:**    To use GPT-3, sign up for OpenAI's API and obtain your API key. Keep this key secure.  4. **Data Collection and Preprocessing:**    Collect and preprocess your data. This might

In [187]:
res=input()
val=response(res)
print(val)
if val==[0]:
    print('Human Generated')
else:
    print('AI Generated')

Exploring the quiet charm of a small coastal town is a delightful experience. The gentle sound of waves lapping against the shore, the salty breeze in the air, and the quaint, colorful cottages that line the streets create an idyllic setting for a relaxing getaway. As you stroll through the town, you can't help but notice the friendly smiles of the locals, who take pride in their tight-knit community.  The town's seafood market is a must-visit, where you can savor the freshest catches of the day. Whether it's succulent shrimp, buttery lobster, or flaky cod, you'll find a variety of options to satisfy your taste buds. Don't forget to pair your meal with a chilled glass of white wine from a local vineyard for the perfect coastal dining experience.  One of the town's hidden gems is a historic lighthouse that stands proudly at the edge of the rocky cliffs. Climbing to the top offers breathtaking panoramic views of the ocean, with seagulls soaring in the distance. It's a spot where you can 

In [188]:
res=input()
val=response(res)
print(val)
if val==[0]:
    print('Human Generated')
else:
    print('AI Generated')

Hello, Mohamed Ashraf! It's a pleasure to have the opportunity to connect with you. Your name carries with it a sense of uniqueness and individuality, and I'm here to assist and engage with you in any way you need. Whether you have questions, seek information, or just want a friendly chat, feel free to reach out. Your name is your identity, and I'm here to make your experience enjoyable and informative. Don't hesitate to share your thoughts, questions, or interests, and I'll do my best to assist you. Welcome!
[1]
AI Generated


In [190]:
res=input()
val=response(res)
print(val)
if val==[0]:
    print('Human Generated')
else:
    print('AI Generated')

Hello, Mohamed Ashraf! It's wonderful to meet you. Your name has a special meaning, and it's a pleasure to have this moment to connect with you. How can I assist you today? Whether it's answering questions, sharing stories, or just having a friendly conversation, I'm here for you. Feel free to open up and share your thoughts, and let's make our interaction enjoyable and meaningful. Welcome, and let's get started
[0]
Human Generated
