### import packages

In [1]:
import csv
import random
from sklearn.model_selection import train_test_split
import os
import numpy as np
import pandas as pd
import math

### load Data and create training- and testdata


In [2]:
def load_data(file_name):

    if os.path.exists(file_name): #Kontrolle, ob die Daten vorhanden sind
            with open(file_name, 'r', encoding="utf8") as csvfile: #Öffnen und auslesen der Datei 
                csv_reader_object = csv.reader(csvfile, delimiter=',')
                counter = 0
                csv_list = []
                for row in csv_reader_object: #Jede Zeile in den Daten wird ausgelesen und in einer Liste gespeichert
                    #print(row)
                    if counter == 0:
                        pass
                    else:
                        csv_list.append(row+[file_name[7:11]])
                    counter += 1
            
            #print(counter-1,"Einträge aus", file_name[7:], "geladen")
            return csv_list        
    else:
        print("Datei", file_name ,"nicht gefunden") 
        
def get_data():
    file_folder = "./data/"
    files = ["Fake.csv","True.csv"]

    main_data = []

    for element in files: #Jede Datei aus der Liste wird ausgelesen
        file_name = file_folder+element
        main_data += (load_data(file_name))

    #print("Es gibt insgesamt", len(main_data), "Einträge")

    #random.shuffle(main_data) #Randomizieren aller Daten
    main_data,unused_data = train_test_split(main_data,test_size=0.95) #Reduzierung des Datensatz auf 30%
    train_data,test_data = train_test_split(main_data,test_size=0.2) #Unterteilung in Training- und Testdaten

    print("Länge train_data:", len(train_data)," und Länge test_data:", len(test_data))
    #print(train_data[0][3])

    return train_data,test_data

# Vorverarbeitung

In [3]:
def clear_data(data):

    qwe_list = "!\"#$%&()*+-./:;<=>?@[\]^_`{|}~\n1234567890“”,'‘’…¿0\\"
    forbidden_words = ["pictwitter","http","reuters","\\xa0","\\u200"]
    data_clear = []
    
    for news in data: #Für jeden Datensatz werden Zahlen und Sonderzeichen herausgefilter und in einzelne Wörter unterteilt
        try:        
            record = news[0]+news[1]
        except:
            record = news[0]
        for element in qwe_list:
            record = record.replace(element," ")
        record = record.split(" ")

        tmp_list = []
        for element in record: #Für jedes Wort im Datensatz wird kontrolliert, ob es erlaubt ist und in Kleinbuchstaben gesetzt
            element = element.replace(" ","").lower()
            skip = 0
            for fbw in forbidden_words:
                if fbw in element:
                    skip = 1
                    break                    
            
            if len(element)>1 and skip == 0 : #jedes Wort muss eine minimal Länge von 2 Zeichen besitzen
                tmp_list.append(element)
        try:    
            news = [tmp_list,news[4]]
        except:
            news = [tmp_list]
        data_clear.append(news)
    return data_clear

In [4]:
#Laden und Aufbreiten der Training- und Testdaten
train_data,test_data = get_data()
train_data_clear = clear_data(train_data)
test_data_clear = clear_data(test_data)

Länge train_data: 1795  und Länge test_data: 449


## Balancetest

In [None]:
train_true = 0
train_false = 0
test_true = 0
test_false = 0

for x in range(len(train_data_clear)):
    if train_data_clear[x][1] == "Fake":
        train_false+= 1
    if train_data_clear[x][1] == "True":
        train_true+= 1
        
for x in range(len(test_data_clear)):
    if test_data_clear[x][1] == "Fake":
        test_false+= 1
    if test_data_clear[x][1] == "True":
        test_true+= 1
    
print(f"Balance (True/False): 90% - 110% is good\t >100% => more True\nTraining: {round(train_true/train_false*100,4)}%\nTest: {round(test_true/test_false,4)*100}%")

# TF-IDF

In [6]:
def get_tf_idf_train(train_data): #Umwandlung der Wörter im Trainingdatensatz in Wahrscheinlichkeiten
    all_word_count = {} #Dict für alle Wörter in allen Texten
    news_count = len(train_data) #Anzahl der Nachrichtentexte
    news_tf = {} #Dict für tf-Werte pro Wort je Nachrichtentext {0:{word:tf-Wert,...}...}
    unique_words = [] #Liste aller vorkommenden Wörter
    index = 0 #Index für den news_tf
    
    for news in train_data: 
        len_news = len(news[0]) #Länge des aktuellen Nachrichtentext
        tmp_word_count = {} #Dict für alle in Wörter in einem Nachrichtentext
        news_tf.update({index:{}}) #Hinzufügen eines neues Dict für einen Nachrichtentext
                      
        for word in news[0]: #Für jedes Wort im Nachrichtentext
            if word not in unique_words: #Überprüfen, ob das Wort noch nicht in unique_words vorkommt, falls ja hinzufügen dieses
                unique_words.append(word)
            
            if word in tmp_word_count.keys():
                tmp_word_count[word] += 1
            else:
                tmp_word_count.update({word:1})
                
        for word in tmp_word_count.keys(): #Temporäres Dict für jeden Nachrichtentext mit Anzahl der vorkommenden Wörter
            
            news_tf[index].update({word:tmp_word_count[word]/len_news}) #{word:tf}
            
            if word in all_word_count.keys():
                all_word_count[word] += 1
            else:
                all_word_count.update({word:1})
                
        index += 1

    idf_dict = {} #{word:idf}
    for word in all_word_count.keys(): 
        df = all_word_count[word]/news_count #df
        idf = math.log(news_count/(df+1),10)
        idf_dict.update({word:idf})
        #tf_idf = tf * idf

    unique_words.sort() #Alphanumerisches Sortieren des Datensatz
    unique_word_count = len(unique_words) #Länge der Liste mit einmaligen Wörtern
    
    #Shape der Matrix = news_count*unique_word_count+1 | das +1 ist für den class_value(True/Fake)(1/0)
    tf_idf_matrix = np.zeros((news_count, unique_word_count+1), dtype=float) 
    # Erstellen einer Nullmatrix mit Anzahl Texte*Anzahl Wörter
    
    for news_index in range(news_count):
        if train_data[news_index][-1] == "True":
            tf_idf_matrix[news_index,unique_word_count] = 1            
        for word_index in range(unique_word_count):
            #print([news_index,word_index])
            if unique_words[word_index] in news_tf[news_index].keys():
                tf_idf_matrix[news_index,word_index] = news_tf[news_index][unique_words[word_index]] * idf_dict[unique_words[word_index]]

    return tf_idf_matrix,all_word_count,news_count

In [7]:
def get_tf_idf_test(test_data,all_word_count,total_rows):#Umwandlung der Wörter im Testdatensatz in Wahrscheinlichkeiten
    news_count = len(test_data) 
    news_tf = {}
    index = 0
    unique_words = list(all_word_count.keys())
    unique_word_count = len(unique_words)
    
    for news in test_data: 
        #Für jeden Datensatz der Testdaten wird ein temporäres Dict erstellt 
        #in welchem die Anzahl der vorkommenden Wörter gespeichert wird.
        len_news = len(news[0])
        tmp_word_count = {}
        news_tf.update({index:{}}) 
        
        for word in news[0]:
            
            if word not in unique_words: #Wörter, welche nicht im Trainingsdatensatz vorkommen werden aussortiert
                continue

            if word in tmp_word_count.keys():
                tmp_word_count[word] += 1
            else:
                tmp_word_count.update({word:1})

        for word in tmp_word_count.keys(): #Temporäres Dict für jeden Nachrichtentext mit Anzahl der vorkommenden Wörter
            
            news_tf[index].update({word:tmp_word_count[word]/len_news}) #{word:tf}
            
            if word in all_word_count.keys():
                all_word_count[word] += 1

        index += 1
    
    idf_dict = {} #{word:idf}
    total_rows += news_count
    
    for word in all_word_count.keys():
        df = all_word_count[word]/(total_rows) #df
        idf = math.log((total_rows)/(df+1),10)
        idf_dict.update({word:idf})   
        
    tf_idf_matrix = np.zeros((news_count, unique_word_count), dtype=float)
    for news_index in range(news_count):
        for word_index in range(unique_word_count):
            if unique_words[word_index] in news_tf[news_index].keys():
                tf_idf_matrix[news_index,word_index] = news_tf[news_index][unique_words[word_index]] * idf_dict[unique_words[word_index]]
    return tf_idf_matrix

In [8]:
#Umwandeln des Training- und Testdatensatz ins TF-IDF-Format
tf_idf_train, all_word_count,total_rows = get_tf_idf_train(train_data_clear)
tf_idf_test = get_tf_idf_test(test_data_clear,all_word_count,total_rows)

In [9]:
counter = 0
counter_words = 0
gesamt = 0
for element in tf_idf_test[0]:
    if element>0:
        print(f"{counter}: {element}")
        counter_words += 1
        gesamt+= element
    counter += 1
print(f"Gesamtwörter im Text: {counter_words}")
print("Gesamt: ",gesamt)

7: 0.02392780061920321
11: 0.07174377299554488
22: 0.04773063375223251
38: 0.07168346101418677
58: 0.07183256913326919
67: 0.025870811145451655
71: 0.02548871183420753
82: 0.04860907965846482
86: 0.09573925929696597
100: 0.024383755699161156
107: 0.025515954449205867
116: 0.0721278338915953
132: 0.025425176620374697
149: 0.04941943915428312
150: 0.02549611997173544
172: 0.02423138577684466
181: 0.07326679886636671
189: 0.05168937029035016
202: 0.0249619920282557
203: 0.02414231426304589
211: 0.02493779056428135
229: 0.025869431065855705
295: 0.05050579582213202
326: 0.050226299139247955
328: 0.04936670670168309
351: 0.024532108920738017
382: 0.02548871183420753
389: 0.025859786194597924
487: 0.025973104076500896
507: 0.05140764792559134
664: 0.04992609638377506
691: 0.025533405244088918
723: 0.02559647259223169
727: 0.05130850646638284
754: 0.10304321888420391
802: 0.025186814664591637
804: 0.0505219224496365
994: 0.02579707867268038
1038: 0.025408272329550577
1149: 0.05192064936090398

## Speichern des TF-IDF

In [10]:
# f = open(filename, "w")
# json.dump(dic, f)
# f = open(filename, "r")
#     dic = json.loads(f.read())
    
# np.save('word_vec_dict.npy', word_vec_dict)
# np.load('word_vec_dict.npy', allow_pickle=True)[()]

# Naives Bayes


In [23]:
def summarize_dataset(dataset): #Berechnung des Durschnittswert, Standardbweichung und Anzahl im Datensatz
    print(dataset)
    stats = [(np.mean(row),np.std(row),len(row)) for row in zip(*dataset) if (np.mean(row)!=0)]
    #print("stats: ",stats)
    #print(stats)
    return stats

def naives_bayes(tf_idf_matrix): #Anwendung des Naives Bayes Algorithmus 
    
    separated = {0:[],1:[]}
    summaries = {}
    
    for element in range(len(tf_idf_matrix)): #Aufteilen des Datensatz in True und Fake
        vector = tf_idf_matrix[element]
        #print("vector: ", vector)
        class_value = vector[-1]
        separated[class_value].append(vector[:-1:])
    
    for class_value, rows in separated.items(): #Speichern 
        summaries[class_value] = summarize_dataset(rows)

    return summaries

## Probabilities

In [24]:
def pdf(x, mean, std): #Berechnung der (Gaussian) probability distribution function
    stdx = 2*std**2
    #print("std: ",std)
    #print("stdx: ",stdx)
    exponent = math.exp(-((x-mean)**2 / stdx))
    pdf = (1 / (math.sqrt(2 * math.pi) * std)) * exponent
    #print("pdf: ",pdf) 
    return pdf
    

In [25]:
def probs(summaries,new_news):
    total_rows = sum([summaries[label][0][2] for label in summaries])

    probabilities = {}
    for class_value, class_summaries in summaries.items():
        probabilities[class_value] = summaries[class_value][0][2]/total_rows
        #print(class_value,probabilities[class_value],summaries[class_value])
        for element in range(len(class_summaries)):
            mean, std, count = class_summaries[element]
            #print(new_news[element])
            probabilities[class_value] *= pdf(new_news[element], mean, std)
    print(probabilities)
    return probabilities

In [26]:
# Vorhersage der class_value des Testdatensatz
def predict(summaries, new_news):
    
    probabilities = probs(summaries, new_news)
    best_label, best_prob = None, -1
    
    for class_value, probability in probabilities.items():
        #print(probability," : ",class_value)
        if best_label is None or probability > best_prob:
            best_prob = probability
            best_label = class_value
    return best_label

## Create Model

In [27]:
def create_model(train_data):
    model = naives_bayes(train_data) #Trainieren des Algorithmus mit Testdaten
    return model

## Test


In [28]:
def test(train_data, test_data): #Lernen des Algorithmus und Testen des Algorithmus mit Testdaten
    
    model = create_model(train_data)
    
    predictions = []
    for new_news in test_data: #Testen des Algortihmus mit Testdaten
        predictions.append(predict(model,new_news))
    return predictions,model


In [29]:
def get_class_values(data): #Herausfiltern der class_values aus dem Testdatensatz
    test_class_values = []
    for element in data:
        test_class_values.append(element[1])
    return test_class_values

In [30]:
test_class_values = get_class_values(test_data_clear)
#print(test_class_values)
predictions,model = test(tf_idf_train,tf_idf_test)

[array([0., 0., 0., ..., 0., 0., 0.]), array([0., 0., 0., ..., 0., 0., 0.]), array([0., 0., 0., ..., 0., 0., 0.]), array([0., 0., 0., ..., 0., 0., 0.]), array([0., 0., 0., ..., 0., 0., 0.]), array([0., 0., 0., ..., 0., 0., 0.]), array([0., 0., 0., ..., 0., 0., 0.]), array([0., 0., 0., ..., 0., 0., 0.]), array([0., 0., 0., ..., 0., 0., 0.]), array([0., 0., 0., ..., 0., 0., 0.]), array([0., 0., 0., ..., 0., 0., 0.]), array([0., 0., 0., ..., 0., 0., 0.]), array([0., 0., 0., ..., 0., 0., 0.]), array([0., 0., 0., ..., 0., 0., 0.]), array([0., 0., 0., ..., 0., 0., 0.]), array([0., 0., 0., ..., 0., 0., 0.]), array([0., 0., 0., ..., 0., 0., 0.]), array([0., 0., 0., ..., 0., 0., 0.]), array([0., 0., 0., ..., 0., 0., 0.]), array([0., 0., 0., ..., 0., 0., 0.]), array([0., 0., 0., ..., 0., 0., 0.]), array([0., 0., 0., ..., 0., 0., 0.]), array([0., 0., 0., ..., 0., 0., 0.]), array([0., 0., 0., ..., 0., 0., 0.]), array([0., 0., 0., ..., 0., 0., 0.]), array([0., 0., 0., ..., 0., 0., 0.]), array([0., 

[array([0., 0., 0., ..., 0., 0., 0.]), array([0., 0., 0., ..., 0., 0., 0.]), array([0., 0., 0., ..., 0., 0., 0.]), array([0., 0., 0., ..., 0., 0., 0.]), array([0., 0., 0., ..., 0., 0., 0.]), array([0., 0., 0., ..., 0., 0., 0.]), array([0., 0., 0., ..., 0., 0., 0.]), array([0., 0., 0., ..., 0., 0., 0.]), array([0., 0., 0., ..., 0., 0., 0.]), array([0., 0., 0., ..., 0., 0., 0.]), array([0., 0., 0., ..., 0., 0., 0.]), array([0., 0., 0., ..., 0., 0., 0.]), array([0., 0., 0., ..., 0., 0., 0.]), array([0., 0., 0., ..., 0., 0., 0.]), array([0., 0., 0., ..., 0., 0., 0.]), array([0., 0., 0., ..., 0., 0., 0.]), array([0., 0., 0., ..., 0., 0., 0.]), array([0., 0., 0., ..., 0., 0., 0.]), array([0., 0., 0., ..., 0., 0., 0.]), array([0., 0., 0., ..., 0., 0., 0.]), array([0., 0., 0., ..., 0., 0., 0.]), array([0., 0., 0., ..., 0., 0., 0.]), array([0., 0., 0., ..., 0., 0., 0.]), array([0., 0., 0., ..., 0., 0., 0.]), array([0., 0., 0., ..., 0., 0., 0.]), array([0., 0., 0., ..., 0., 0., 0.]), array([0., 

{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: nan, 1: 0.0}


  probabilities[class_value] *= pdf(new_news[element], mean, std)
  probabilities[class_value] *= pdf(new_news[element], mean, std)


{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.0}
{0: 0.0, 1: 0.

In [31]:
#1. Wahrscheinlichkeiten der Worte pro Klasse aggregieren/Durschnitt 1 Vektor pro Klasse 0,5>x True
#{0:{word1:0.54,word2:0.4}, (0.54+0.4)/2 
#1:{word1:0.46,word2:0.6}}

# Evaluation

In [20]:
def evaluation(test_class_values, predictions): # Evaluieren des Algorithmus
    fp,tp,fn,tn = 0,0,0,0

    for class_value in test_class_values:
        for pred in predictions:

            if class_value == "True" and pred == 1:
                tp+= 1
                break
            elif class_value == "True" and pred == 0:
                fn+= 1
                break
            elif class_value == "Fake" and pred == 0:
                tn+= 1
                break
            elif class_value == "Fake" and pred == 1:
                fp+= 1
                break
    
    print("Gesamt Nachrichtentexte:", fp+tp+fn+tn)
    print(f"FP: {fp}: TP : {tp} FN: {fn} TN: {tn}")
    
    if fp+tp+fn+tn > 0:
        accuracy = round((tp+tn)/(fp+tp+fn+tn),3)
    else:
        accuracy = 0
        
    if tp+fp > 0:
        precision = (tp)/(tp+fp)
    else:
        precision = 0
    
    if tp+fn > 0:
        recall = (tp)/(tp+fn)
    else:
        recall = 0
        
    if precision+recall > 0:
        f1score = 2*(precision*recall)/(precision+recall)
    else:
        f1score = 0
        
    print(f"Genauigkeit: {round(accuracy*100,3)}%\nPräzision: {round(precision,3)}\nRecall: {round(recall,3)}\nF1-Score: {round(f1score,3)}")
    

In [21]:
evaluation(test_class_values, predictions)

Gesamt Nachrichtentexte: 449
FP: 0: TP : 0 FN: 221 TN: 228
Genauigkeit: 50.8%
Präzision: 0
Recall: 0.0
F1-Score: 0


# Sklearn

In [22]:
# # Get training data.
# from sklearn.datasets import fetch_20newsgroups
# news_groups_train = fetch_20newsgroups(subset="train")

# # Create dictionary and transform to feature vectors.
# from sklearn.feature_extraction.text import CountVectorizer
# count_vector = CountVectorizer()
# X_train_counts = count_vector.fit_transform(news_groups_train.data)

# # TF-IDF vectorize.
# from sklearn.feature_extraction.text import TfidfTransformer
# tfidf_transformer = TfidfTransformer()
# X_train_tfidf = tfidf_transformer.fit_transform(X_train_counts)

# Create model(naive bayes) and training. 
from sklearn.naive_bayes import MultinomialNB
clf = MultinomialNB().fit(X_train_tfidf, news_groups_train.target)

# Create test documents and vectorize.
docs_new = ['God is love', 'OpenGL on the GPU is fast', "United states goes to Iraq"]
X_new_counts = count_vector.transform(docs_new)
X_new_tfidf = tfidf_transformer.transform(X_new_counts)

NameError: name 'X_train_tfidf' is not defined

In [None]:
from sklearn.naive_bayes import MultinomialNB

naive_bayes_classifier = MultinomialNB()
naive_bayes_classifier.fit(X_train_tf, train_y)
y_pred = naive_bayes_classifier.predict(test_data_clear)

# Anwendungsfall

In [None]:
example_data = ['RAND PAUL Picks Fight With Wrong Senator: Says Cruz Is “Pretty Much Done” In Senate Because He Won’t Get Along [VIDEO] Here s a newsflash Rand We re not looking for a guy who can win a popularity contest in Washington to be our next President. We re actually looking for a candidate who is willing to stand up to politicians who ve forgotten who they came to Washington DC to represent We really do love much of what Rand stands for, but his comments about Senator Cruz couldn t have been more off base.Sen. Rand Paul on Tuesday said fellow Republican presidential candidate Ted. Cruz is  done for  in the Senate. Ted has chosen to make this really personal and chosen to call people dishonest in leadership and call them names, which really goes against the decorum and also against the rules of the Senate, and as a consequence, he can t get anything done legislatively,  Paul told Fox News Radio.  He is pretty much done for and stifled and it s really because of personal relationships, or lack of personal relationships, and it is a problem. Don t get us wrong We happen to agree with Senator Rand Paul on a lot of things. Picking on one of the most courageous men in Washington DC however, was not a very good idea especially when there is video footage like this that can be easily accessed:Paul, a Kentucky Republican who has had the backing of his home-state senior senator, Majority Leader Mitch McConnell, despite some tough policy differences, had been asked about Cruz s inability to even muster the support of 11 senators to secure a roll call vote on a procedural motion designed to amend the continuing resolution to keep the government running.The Senate s set to vote on passage of that measure at 10 a.m. on Wednesday, the last day of the federal government s 2015 fiscal year. I approach things a little different, I am still just as hardcore in saying what we are doing, I just chose not to call people liars on the Senate floor and it s just a matter of different perspectives on how best to get to the end result,  Paul said in the interview.Paul backed McConnell s 2014 re-election bid. Cruz had accused McConnell of lying to him about the way forward for the revival of the Export-Import Bank in the Senate.Like Cruz, Paul opposes the CR advanced by McConnell to avoid a shutdown later in the week. But Paul has focused his criticism on the use of the stopgap spending vehicles. Paul would have rather seen more pressure put on Democrats to advance the dozen individual appropriation bills. I would defund not only Planned Parenthood but hundreds and hundreds of regulations, hundreds and hundreds of wasteful programs. I would take them all out, put them on the table and say  You know what Democrats, it doesn t take 60 votes to defund something, it s actually going to take 60 votes to fund any of these programs,  vote on them one at a time and we will see how many of these crazy programs get 60 votes. My guess would be very few, but that would take the courage to let the spending expire and start anew and let new programs all require 60 votes to pass,  Paul said.McConnell signaled Tuesday he intends to call additional votes on the regular fiscal 2016 spending bills, though there was no evidence Democrats accede to that proposal without a big budget agreement between the two parties. Via: Roll CallIf you want to see why Ted Cruz isn t the most popular guy in Washington DC, watch this video that was taken only two days ago on the Senate floor. Watch this, and you ll know why conservatives who are sick of politicians who leave their spines in their home states with their campaign promises love this guy:']
#Fake

example_data_clear = clear_data(example_data)

print(example_data_clear)

tf_idf_example = get_tf_idf_test(example_data_clear,all_word_count,total_rows)
pred = predict(model,tf_idf_example)

print("Dieser Artikel ist", pred)