In [6]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import FunctionTransformer
import re
import string
import joblib

# Load data
data_fake = pd.read_csv("/content/fake.csv", encoding="utf-8")
data_true = pd.read_csv("/content/true.csv", encoding="utf-8")

# Add 'class' column
data_fake["class"] = 0
data_true["class"] = 1

# Remove rows for manual testing
data_fake_manual_testing = data_fake.iloc[-10:].copy()
data_true_manual_testing = data_true.iloc[-10:].copy()

# Drop the rows for manual testing
data_fake.drop(data_fake.tail(10).index, inplace=True)
data_true.drop(data_true.tail(10).index, inplace=True)

# Display the shapes of the DataFrames
print(data_fake.shape, data_true.shape)

# Add 'class' column to the manual testing DataFrames
data_fake_manual_testing['class'] = 0
data_true_manual_testing['class'] = 1

# Display the first 10 rows of the manual testing DataFrames
print(data_fake_manual_testing.head(10))
print(data_true_manual_testing.head(10))

# Concatenate the modified DataFrames
data_merge = pd.concat([data_fake, data_true], axis=0)

# Display the first 10 rows of the concatenated DataFrame
print(data_merge.head(10))

# Drop unnecessary columns
data = data_merge.drop(['title', 'subject', 'date'], axis=1)

# Check for null values
print(data.isnull().sum())

# Shuffle the DataFrame
data = data.sample(frac=1)

# Reset the index
data.reset_index(inplace=True)

# Drop the 'index' column
data.drop(['index'], axis=1, inplace=True)

# Display the first 5 rows of the DataFrame
print(data.head())

# Split the data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(data['text'], data['class'], test_size=0.25, random_state=42)

# Define a function to clean the text
def clean_text(text):
    text = re.sub(r'http\S+', '', text)  # Remove URLs
    text = re.sub(r'\S+\.com\S+', '', text)  # Remove .com URLs
    text = re.sub('\S*@\S*\s?', '', text)  # Remove emails
    text = re.sub('\s+', ' ', text)  # Remove multiple spaces
    text = text.translate(str.maketrans('', '', string.punctuation))  # Remove punctuation
    text = text.lower()  # Convert to lowercase
    return text

# Define the pipeline
text_pipeline = Pipeline([
    ('clean_text', FunctionTransformer(func=lambda x: x.apply(clean_text))),
    ('vectorizer', TfidfVectorizer()),
])

# Preprocess the training and testing data
x_train_processed = text_pipeline.fit_transform(x_train)
x_test_processed = text_pipeline.transform(x_test)

# Initialize classifiers
LR = LogisticRegression(random_state=0)
DT = DecisionTreeClassifier(random_state=0)
RF = RandomForestClassifier(random_state=0)
GB = GradientBoostingClassifier(random_state=0)

# Train classifiers
LR.fit(x_train_processed, y_train)
DT.fit(x_train_processed, y_train)
RF.fit(x_train_processed, y_train)
GB.fit(x_train_processed, y_train)

# Save the models to files
joblib.dump(LR, 'LR_model.joblib')
joblib.dump(DT, 'DT_model.joblib')
joblib.dump(RF, 'RF_model.joblib')
joblib.dump(GB, 'GB_model.joblib')


# Load the models from files
LR = joblib.load('LR_model.joblib')
DT = joblib.load('DT_model.joblib')
RF = joblib.load('RF_model.joblib')
GB = joblib.load('GB_model.joblib')

# Define a function for manual testing
def manual_testing(news_input, vectorization, LR, DT, GB, RF):
    news_input = pd.Series(news_input)
    news_input_processed = vectorization.transform(news_input)

    LR_pred = LR.predict(news_input_processed)
    DT_pred = DT.predict(news_input_processed)
    RF_pred = RF.predict(news_input_processed)
    GB_pred = GB.predict(news_input_processed)

    print("Logistic Regression Prediction:", "Fake" if LR_pred[0] == 0 else "True")
    print("Decision Tree Prediction:", "Fake" if DT_pred[0] == 0 else "True")
    print("Random Forest Prediction:", "Fake" if RF_pred[0] == 0 else "True")
    print("Gradient Boosting Prediction:", "Fake" if GB_pred[0] == 0 else "True")

# Assuming vectorization is the TfidfVectorizer object used during training
vectorization = TfidfVectorizer()

# Assuming vectorization is the TfidfVectorizer object used during training
vectorization = text_pipeline.named_steps['vectorizer']

# Define the news_input variable
news_input = '''The Pentagon is considering a Boeing proposal to supply Ukraine with cheap, small precision bombs fitted onto abundantly available rockets, allowing Kyiv to strike far behind Russian lines as the West struggles to meet the demand for more arms.
U.S. and allied military inventories are shrinking, and Ukraine faces an increasing need for more sophisticated weapons as the war drags on. Boeing's proposed system, dubbed Ground-Launched Small Diameter Bomb (GLSDB), is one of about a half-dozen plans for getting new munitions into production for Ukraine and America's Eastern European allies, industry sources said.
Although the United States has rebuffed requests for the 185-mile (297km) range ATACMS missile, the GLSDB's 94-mile (150km) range would allow Ukraine to hit valuable military targets that have been out of reach and help it continue pressing its counterattacks by disrupting Russian rear areas.
GLSDB could be delivered as early as spring 2023, according to a document reviewed by Reuters and three people familiar with the plan. It combines the GBU-39 Small Diameter Bomb (SDB) with the M26 rocket motor, both of which are common in U.S. inventories.
Doug Bush, the U.S. Army's chief weapons buyer, told reporters at the Pentagon last week the Army was also looking at accelerating production of 155-millimeter artillery shells - currently only manufactured at government facilities - by allowing defense contractors to build them.
The invasion of Ukraine drove up demand for American-made weapons and ammunition, while U.S. allies in Eastern Europe are "putting a lot of orders," in for a range of arms as they supply Ukraine, Bush added.
"It's about getting quantity at a cheap cost," said Tom Karako, a weapons and security expert at the Center for Strategic and International Studies. He said falling U.S. inventories help explain the rush to get more arms now, saying stockpiles are "getting low relative to the levels we like to keep on hand and certainly to the levels we're going to need to deter a China conflict."
Karako also noted that the U.S. exit from Afghanistan left lots of air-dropped bombs available. They cannot be easily used with Ukrainian aircraft, but "in today's context we should be looking for innovative ways to convert them to standoff capability."
Although a handful of GLSDB units have already been made, there are many logistical obstacles to formal procurement. The Boeing plan requires a price discovery waiver, exempting the contractor from an in-depth review that ensures the Pentagon is getting the best deal possible. Any arrangement would also require at least six suppliers to expedite shipments of their parts and services to produce the weapon quickly.
A Boeing spokesperson declined to comment. Pentagon spokesman Lt. Cmdr. Tim Gorman declined to comment on providing any "specific capability" to Ukraine but said the U.S. and its allies "identify and consider the most appropriate systems" that would help Kyiv.
GLSDB is made jointly by SAAB AB (SAABb.ST), which opens a new tab, and Boeing Co (BA.N), which opens a new tab and has been in development since 2019, well before the invasion, which Russia calls a "special operation". In October, SAAB chief executive Micael Johansson said of the GLSDB: "We are imminently shortly expecting contracts on that."
According to the document - a Boeing proposal to U.S. European Command (EUCOM), which is overseeing weapons headed to Ukraine - the main components of the GLSDB would come from current U.S. stores.
The M26 rocket motor is relatively abundant, and the GBU-39 costs about $40,000 each, making the completed GLSDB inexpensive and its main components readily available. Although arms manufacturers are struggling with demand, those factors make it possible to yield weapons by early 2023, albeit at a low rate of production.
GLSDB is GPS-guided, can defeat some electronic jamming, is usable in all weather conditions, and can be used against armored vehicles, according to SAAB's website. The GBU-39 - which would function as the GLSDB's warhead - has small, folding wings that allow it to glide more than 100km if dropped from an aircraft and targets as small as 3 feet in diameter.'''

# Call the manual_testing function
manual_testing(news_input, vectorization, LR, DT, GB, RF)


(23471, 5) (21407, 5)
                                                   title  \
23471  Seven Iranians freed in the prisoner swap have...   
23472                      #Hashtag Hell & The Fake Left   
23473  Astroturfing: Journalist Reveals Brainwashing ...   
23474          The New American Century: An Era of Fraud   
23475  Hillary Clinton: ‘Israel First’ (and no peace ...   
23476  McPain: John McCain Furious That Iran Treated ...   
23477  JUSTICE? Yahoo Settles E-mail Privacy Class-ac...   
23478  Sunnistan: US and Allied ‘Safe Zone’ Plan to T...   
23479  How to Blow $700 Million: Al Jazeera America F...   
23480  10 U.S. Navy Sailors Held by Iranian Military ...   

                                                    text      subject  \
23471  21st Century Wire says This week, the historic...  Middle-east   
23472   By Dady Chery and Gilbert MercierAll writers ...  Middle-east   
23473  Vic Bishop Waking TimesOur reality is carefull...  Middle-east   
23474  Paul Craig Roberts