Import Libraries

In [1]:
# Install libraries
import pandas as pd
import numpy as np
import re
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import matplotlib.pyplot as plt


In [2]:
# Load data
fake_news = pd.read_csv('Fake.csv')
true_news = pd.read_csv('True.csv')

fake_news.head(10)

# Add label column (1 for fake, 0 for true)
fake_news['label'] = 1
true_news['label'] = 0

# Combine the datasets
news_data = pd.concat([fake_news[['title', 'text', 'label']], true_news[['title', 'text', 'label']]])

# Shuffle the data
news_data = news_data.sample(frac=1).reset_index(drop=True)

In [3]:
# Drop duplicates
news_data = news_data.drop_duplicates()
print(news_data)

                                                   title  \
0      Senate intelligence panel to probe Russia hacking   
1      U.S. appeals injunction of Trump revised trave...   
2      Pilgrims return to Mecca as haj winds down wit...   
3      American Workers Finally Fighting Back Against...   
4                           Trump’s ‘Wag the Dog’ Moment   
...                                                  ...   
44891  Tension rises as Australia starts to dismantle...   
44892  Republican U.S. Senators Rubio, Lee want boost...   
44894  Challenge to Trump travel ban moves forward in...   
44895  California governor signs drug pricing transpa...   
44897  Gingrich, Christie lead Trump list of vice pre...   

                                                    text  label  
0      WASHINGTON (Reuters) - The leaders of the Sena...      0  
1      SAN FRANCISCO (Reuters) - The U.S. Justice Dep...      0  
2      MECCA (Reuters) - More than two million pilgri...      0  
3      THE H-1B

In [4]:
# Create a TF-IDF vectorizer for the 'text' column
tfidf_text = TfidfVectorizer(max_features=5000, stop_words='english')
text_vectors = tfidf_text.fit_transform(news_data['text'])

# Create a TF-IDF vectorizer for the 'title' column
tfidf_title = TfidfVectorizer(max_features=2000, stop_words='english')
title_vectors = tfidf_title.fit_transform(news_data['title'])

In [5]:
import numpy as np
# Concatenate the vectors
combined_vectors = np.concatenate((title_vectors.toarray(), text_vectors.toarray()), axis=1)

# Calculate the correlation matrix
correlation_matrix = np.corrcoef(combined_vectors, rowvar=False)

# Print or further process the correlation matrix as needed
print(correlation_matrix)


[[ 1.          0.06133443  0.08516908 ... -0.00106194 -0.00103078
  -0.00290588]
 [ 0.06133443  1.         -0.0028     ... -0.00354851 -0.00304935
  -0.0025984 ]
 [ 0.08516908 -0.0028      1.         ...  0.0016023  -0.00241373
  -0.00205678]
 ...
 [-0.00106194 -0.00354851  0.0016023  ...  1.          0.01423036
  -0.00260661]
 [-0.00103078 -0.00304935 -0.00241373 ...  0.01423036  1.
  -0.00223994]
 [-0.00290588 -0.0025984  -0.00205678 ... -0.00260661 -0.00223994
   1.        ]]


In [6]:
import numpy as np
# Combine the text and title vectors
combined_vectors = np.hstack((text_vectors.toarray(), title_vectors.toarray()))

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(combined_vectors, news_data['label'], test_size=0.2, random_state=42)

# Train a Logistic Regression model
model = LogisticRegression(max_iter=1000)  # Increased max_iter
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")

print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))


Accuracy: 0.9910497378851809
[[4118   21]
 [  49 3633]]
              precision    recall  f1-score   support

           0       0.99      0.99      0.99      4139
           1       0.99      0.99      0.99      3682

    accuracy                           0.99      7821
   macro avg       0.99      0.99      0.99      7821
weighted avg       0.99      0.99      0.99      7821



In [7]:
import pickle

# Save the model
filename_model = 'finalized_model.pkl'
pickle.dump(model, open(filename_model, 'wb'))

# Save the vectorizers
filename_vectorizer_text = 'tfidf_text_vectorizer.pkl'
pickle.dump(tfidf_text, open(filename_vectorizer_text, 'wb'))

filename_vectorizer_title = 'tfidf_title_vectorizer.pkl'
pickle.dump(tfidf_title, open(filename_vectorizer_title, 'wb'))


In [8]:
import pickle
import numpy as np

# Load the saved model and vectorizers
filename_model = 'finalized_model.pkl'
filename_vectorizer_text = 'tfidf_text_vectorizer.pkl'
filename_vectorizer_title = 'tfidf_title_vectorizer.pkl'

model = pickle.load(open(filename_model, 'rb'))
tfidf_text = pickle.load(open(filename_vectorizer_text, 'rb'))
tfidf_title = pickle.load(open(filename_vectorizer_title, 'rb'))

# Create mock prediction data
mock_data = {
    'title': [
        'Watch: Is This Proof Trump Is Unfit for Service?',
        'Sean Hannity Gets Wrecked For Yelling At Time Magazine For Calling Out Trumpâ€™s Lie',
        'U.S. appeals court rejects challenge to Trump voter fraud panel',
        'South Africas Dlamini-Zuma says business endorsement not a priority in ANC race',
        'Saudi king appoints 30 judges, promotes 26 amid anti-graft purge'],
    'text': [
        'New questions are being asked about President Donald Trump s ability to lead after he slurred his words during a speech about his Jerusalem decision. Possible reasons for this include: fatigue, a dry mouth (the White House explanation), the use of drugs or alcohol, a problem with his dentures or more troubling issues dealing with his mental or physical health. Morning Joe reported this morning that, unlike other presidents, Trump has opted not to get his physicals at the Walter Reed Army Medical Center.Questions about Trump s mental stability have been growing over the last few months. While he has never been viewed as a stable person in the traditional sense, his tweets and comments have gotten more erratic. He was widely criticized recently when he retweeted several anti-Muslim videos that were posted by radicals in the United Kingdom.One psychiatrist talk to MSNBC s Lawrence O Donnel about his impressions of Trump s state of mind.Many think that any degradation in Trump s mental state may be due to the increased pressure he is feeling from Robert Mueller s investigations into collusion between his campaign and the Russian government. This has increased since former National Security Advisor Michael Flynn pleaded guilty to lying to the FBI.All of this talk is leading to more people to ask if Trump should be removed from office, citing the 25th Amendment to the U.S. Constitution. Rep. Jamie D. Raskin (D-MD) has circulated a  dear colleague  letter suggesting just that. As published in the Washington Post, it says: Please join a rapidly growing group of colleagues in cosponsoring H.R. 1987, the Oversight Commission on Presidential Capacity Act. It sets up and defines the Congressionally-appointed  body  called for by the 25th Amendment. Under Section 4 of the 25th Amendment, the Vice-President and a majority of the Cabinet or the Vice-President and a majority of  such other body as Congress may by law provide  can determine that the President is for reasons of physical or mental incapacity unable to discharge the powers and duties of his office. The 25th Amendment was added to the Constitution in 1967, but in the last 50 years Congress never created the  body  that its language contemplated. Perhaps it never occurred to prior Congresses that setting up this body was necessary. For obvious reasons, it is indeed necessary, and now is the time for us to do it. While the Republicans in the Cabinet and Congress may not yet be ready to take this step, it is out there.Featured image via Andrew Burton/Getty Images',
        'Fox News host Sean Hannity is having a Twitter meltdown after Time called out Donald Trump for claiming that the magazine called him regarding the Person of the Year award and that he declined the offer.Time magazine corrected Trump in a tweet, writing,  The President is incorrect about how we choose Person of the Year. TIME does not comment on our choice until publication, which is December 6. The magazine calling out Trump was too much to bear for Trump cult member Sean Hannity who called it  bullshit  just hours after Time denied the former reality show star s bizarre claim. I call total Bullshit on Time. Answer the question; did you or did you not call the WH and say @realDonaldTrump @POTUS was being considered for person of the year and ask for an interview?  Hannity tweeted Friday.I call total Bullshit on Time. Answer the question; did you or did you not call the WH and say @realDonaldTrump @POTUS was being considered for person of the year and ask for an interview? https://t.co/DaqfDBsr0V  Sean Hannity (@seanhannity) November 25, 2017And again.So @Time is worse than Fake News @CNN. The coming ice age,  The Big Cool  ha. https://t.co/mppoFtFsvc  Sean Hannity (@seanhannity) November 25, 2017Sean really wants the magazine s attention.Waiting  https://t.co/90GSYMVVKU  Sean Hannity (@seanhannity) November 25, 2017Twitter users, of course, are calling out Liddle Sean.You re questioning Time? Why aren t you questioning the man who hung fake Time covers of himself in his resorts? It s sad you haven t caught on to his lies yet, Sean.  Mrs. SMH (@MaraLaGoFuckYou) November 25, 2017pic.twitter.com/UJtVSaKgOF  What the Fork (@WhattheForkLLC) November 25, 2017Only the finest make it on @TIME @POTUS44 @MichelleObama pic.twitter.com/fGLX2Rzgwo  NAT (@N_A_T_39) November 25, 2017LMAO! Let s look at this logically. @Time magazine has been in business 93 years. It s never gone bankrupt. It s a respected institution.@realdonaldtrump has gone bankrupt 4 times, has been caught in scores of lies including a fake Time cover mounted on his golf club walls. pic.twitter.com/ywzr96arh1  Annalise   Brown (@SWFLib) November 25, 2017Donnie can t stand the idea of Obama being Time s Man of the Year in the first year of his presidency. He knows he ll never be half the man or the President @BarackObama was, and it s eating him up inside. pic.twitter.com/ROMxC3oc7C  Annalise   Brown (@SWFLib) November 25, 2017As this woman notes, Trump has always been obsessed with Time magazine.Honey, just get the man warm milk and one for yourself Also. pic.twitter.com/LMV9lwg9Lx  kastherine (@Mercedes8_S) November 25, 2017Find a comfy chair. Drink some warm milk. And wait for the voices to stop.  Charles P. Pierce (@CharlesPPierce) November 25, 2017With Trump s history, a man who literally hangs photoshopped Time magazine covers featuring his big stupid face, on the walls of at least 4 of his golf courses, we re pretty it s not Time magazine who is lying.Photo by Rob Kim/Getty Images.',
        '(Reuters) - A U.S. appeals court in Washington on Tuesday upheld a lower courtâ€™s decision to allow President Donald Trumpâ€™s commission investigating voter fraud to request data on voter rolls from U.S. states. The U.S. Court of Appeals for the District of Columbia Circuit said the Electronic Privacy Information Center (EPIC) watchdog group, which filed the lawsuit, did not have legal standing to seek to force the presidential commission to review privacy concerns before collecting individualsâ€™ voter data. EPIC had argued that under federal law, the commission was required to conduct a privacy-impact assessment before gathering personal data. But the three-judge appeals court panel ruled unanimously that the privacy law at issue was intended to protect individuals, not groups like EPIC. â€œEPIC is not a voter,â€ Judge Karen Henderson wrote in the ruling.  Washington-based U.S. District Judge Colleen Kollar-Kotelly first denied EPICâ€™s injunction request in July, in part because the collection of data by the commission was not technically an action by a government agency so was not bound by laws that govern what such entities can do.  Kollar-Kotelly noted that the commission, headed by Vice President Mike Pence, was an advisory body that lacks legal authority to compel states to hand over the data. Most state officials who oversee elections and election law experts say that voter fraud is rare in the United States. Trump, a Republican, set up the commission in May after charging, without evidence, that millions of people voted unlawfully in the 2016 presidential election in which he defeated Democratic opponent Hillary Clinton despite losing the popular vote.  The commissionâ€™s vice chair, Kris Kobach, the Republican secretary of state for Kansas and an advocate of tougher laws on immigration and voter identification, asked states in June to turn over voter information. The data requested by Kobach included names, the last four digits of Social Security numbers, addresses, birth dates, political affiliation, felony convictions and voting histories.  More than 20 states refused outright and others said they needed to study whether they could provide the data. Civil rights groups and Democratic lawmakers have said the commissionâ€™s eventual findings could lead to new ID requirements and other measures making it harder for groups that tend to favor Democratic candidates to cast ballots. EPIC executive director Marc Rotenberg could not immediately be reached for comment. ',
        'JOHANNESBURG (Reuters) - South African politician Nkosazana Dlamini-Zuma said on Thursday it was fine if the country s white business community declined to endorse her bid to succeed President Jacob Zuma as leader of the African National Congress (ANC). Her priority was to transfer wealth from the white minority to the black majority, who are generally much poorer. Those who opposed the policy were mainly white people or members of the black elite who want to preserve the status quo, she said.  If we have to choose between our people having a better life and investment, that s not a choice,  she said, when asked about whether her policies could scare away businesses.  I m not afraid. I m not afraid of them. But I m not surprised white minority capital is not endorsing me,  she said on ANN7 television in a rare interview. ANC delegates will vote for a new party president next month, with Dlamini-Zuma expected to face Deputy President Cyril Ramaphosa, a unionist-turned-millionaire businessman who is more popular with foreign investors.  From where I sit, it s looking good. The campaign is going well,  said Dlamini-Zuma, who was married to the president.  The winner of the party vote will be favorite to become the next president of South Africa, either at an election in 2019, or before if Zuma stands down or is forced out by the new ANC leadership next year. Apartheid in South Africa ended in 1994 but much of the country s wealth resides with the white minority. Successive ANC governments have said they want to empower the majority, though many black people have seen only modest economic gains. Dlamini-Zuma, who has held several cabinet posts and was most recently chair of the African Union, has pledged to tackle poverty and close the gaping racial inequality gap. Some investors are concerned about Dlamini-Zuma s proposed plan of  radical economic transformation , which critics have said is a populist term that isn t backed up by solid policies. ',
        'DUBAI (Reuters) - King Salman has appointed or promoted 56 judges, Saudi Arabia s state news agency SPA reported on Thursday, a step coinciding with an anti-corruption crackdown in the kingdom.  The king issued a royal order that promoted 26 judges and appointed 30 others at different levels of the judiciary, SPA said.  Saudi authorities have arrested dozens of royal family members, officials and businessmen in the anti-graft purge that began on Saturday. They face allegations of money laundering, bribery, extortion and exploiting public office for personal gain. It was unclear if the judges will have any role in the widening crackdown. Regulators in the United Arab Emirates have asked banks for information about citizens detained in the investigation, a possible prelude to freezing their accounts. Saudi Arabia has announced it will confiscate assets improperly acquired, and while Riyadh has set no timetable for any such confiscations, banking sources say more than 1,700 domestic bank accounts have already been frozen at the request of the central bank. Huge amounts of money may be at stake. Corruption has over the years siphoned off $800 billion from Saudi state revenues and bankers believe much of it is held abroad, in countries including Switzerland and Britain. ']
}

# Transform the mock data using the loaded vectorizers
text_vectors = tfidf_text.transform(mock_data['text'])
title_vectors = tfidf_title.transform(mock_data['title'])

# Combine the vectors
combined_vectors = np.hstack((text_vectors.toarray(), title_vectors.toarray()))


# Make predictions
predictions = model.predict(combined_vectors)

# Map predictions to "Real" or "Fake"
prediction_labels = ['Real' if pred == 0 else 'Fake' for pred in predictions]

print(prediction_labels)


['Fake', 'Fake', 'Real', 'Real', 'Real']


In [None]:
#ADD# Function to predict and print results

In [None]:
#add part2

In [None]:
oengen

In [None]:
branch