# Trump Bot: Topic Clustering mit GloVe Embeddings

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import time
import re
import operator
import matplotlib.pyplot as plt

ModuleNotFoundError: No module named 'tensorflow'

In [2]:
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans

In [3]:
# Einlesen des Datensatzes, "tweets_test" ist nur eine Test Datei.
train = pd.read_csv("trump_tweets.csv", engine="python", encoding="cp1252")

In [4]:
train.head()

Unnamed: 0,source,text,created_at,favorite_count,id_str
0,Twitter for iPhone,It is a stunner by any stretch of the imaginat...,06-05-2020 13:01:38,38831.0,1.268891e+18
1,Twitter for iPhone,It’s a stupendous number. It’s joyous let’s ca...,06-05-2020 12:59:17,35164.0,1.26889e+18
2,Twitter for iPhone,Oh no the Dems are worried again. The only one...,06-05-2020 12:54:18,59423.0,1.268889e+18
3,Twitter for iPhone,Congratulations to wonderful Charles Payne on ...,06-05-2020 12:51:00,45021.0,1.268888e+18
4,Twitter for iPhone,I will be doing a News Conference at 10:00 A.M...,06-05-2020 12:48:41,59473.0,1.268887e+18


In [5]:
train.shape

(49355, 5)

In [6]:
# Hier wird die Tweet Variable deklariert
tweets = train.text
tweets.head

<bound method NDFrame.head of 0        It is a stunner by any stretch of the imaginat...
1        It’s a stupendous number. It’s joyous let’s ca...
2        Oh no the Dems are worried again. The only one...
3        Congratulations to wonderful Charles Payne on ...
4        I will be doing a News Conference at 10:00 A.M...
                               ...                        
49350    My persona will never be that of a wallflower ...
49351    New Blog Post: Celebrity Apprentice Finale and...
49352    Donald Trump reads Top Ten Financial Tips on L...
49353    Donald Trump will be appearing on The View tom...
49354    Be sure to tune in and watch Donald Trump on L...
Name: text, Length: 49355, dtype: object>

# Tweets "säubern" mit regulären Ausdrücken

In [7]:
def clean_tweet(tweet):
    if (type(tweet) == str):
        if (not re.search(r"^RT.*$", tweet) and not re.search(r"^http.*$", tweet)):
            tweet = str(tweet).lower()
            tweet = re.sub(r'https?:\/\/.*[\r\n]*', '', tweet, flags=re.MULTILINE)
            tweet = re.sub(r'[_"\-;%()|.,+&=*%]', '', tweet)
            tweet = re.sub(r'\.', ' . ', tweet)
            tweet = re.sub(r'\!', ' !', tweet)
            tweet = re.sub(r'\?', ' ?', tweet)
            tweet = re.sub(r'\,', ' ,', tweet)
            tweet = re.sub(r':', ' : ', tweet)
            tweet = re.sub(r'#', ' # ', tweet)
            tweet = re.sub(r'@', ' @ ', tweet)
            tweet = re.sub(r'd .c .', 'd.c.', tweet)
            tweet = re.sub(r'u .s .', 'd.c.', tweet)
            tweet = re.sub(r' amp ', ' and ', tweet)
            tweet = re.sub(r'pm', ' pm ', tweet)
            tweet = re.sub(r'news', ' news ', tweet)
            tweet = re.sub(r' . . . ', ' ', tweet)
            tweet = re.sub(r' .  .  . ', ' ', tweet)
            tweet = re.sub(r' ! ! ', ' ! ', tweet)
            tweet = re.sub(r'&amp', 'and', tweet)
            return tweet
        else:
            return None
    else:
        return None

In [8]:
# Tweets säubern und alle Tweets in clean_tweets packen, dessen Länge > 0 ist.
clean_tweets = []
for tweet in tweets:
    tweet = clean_tweet(tweet)
    if tweet != "none":
        if tweet != None:
            if len(tweet) > 0:
                clean_tweets.append(tweet)
        
len(clean_tweets)

38098

In [9]:
# Prüfung, ob die Tweets gesäubert wurden:
x = 0
for i in range(x, x+5):
    print("Tweet" + str(i))
    print(clean_tweets[i])
    print()

Tweet0
it is a stunner by any stretch of the imagination !  @ cnbc

Tweet1
it’s a stupendous number it’s joyous let’s call it like it is the market was right it’s stunning !  @ jimcramer   @ cnbc

Tweet2
oh no the dems are worried again the only one that can kill this comeback is sleepy joe biden !

Tweet3
congratulations to wonderful charles payne on having been so optimistic and therefore correct market up big ! !

Tweet4
i will be doing a  news  conference at 10 : 00 am on the jobs numbers ! white house



In [10]:
# Zählen, wie oft jedes Wort vorkommt:
word_counts = {}
for tweet in clean_tweets:
    for word in tweet.split():
        if word not in word_counts:
            word_counts[word] = 1
        else:
            word_counts[word] += 1

print("Anzahl der Wörter: ", len(word_counts.keys()))
print("So oft kommt das Wort 'beautiful' vor:", word_counts["beautiful"], "-mal.")

Anzahl der Wörter:  42578
So oft kommt das Wort 'beautiful' vor: 352 -mal.


In [12]:
# GloVe Embeddings laden:
embeddings_index = {}
with open('/Users/maxiw/Desktop/glove.twitter.27B/glove.twitter.27B.200d.txt', encoding='utf-8') as f:
    for line in f:
        values = line.split(' ')
        word = values[0]
        embedding = np.asarray(values[1:], dtype='float32')
        embeddings_index[word] = embedding

print('Word embeddings:', len(embeddings_index))

Word embeddings: 1193515


In [13]:
# um sich die Embeddings besser vorstellen zu können (# entfernen, um Befehl auszuführen):
print(dict(list(embeddings_index.items())[21:22]))

{'me': array([ 4.3249e-01,  2.0712e-01,  2.5209e-02, -3.4764e-01, -1.2378e-01,
        3.8708e-01,  3.4036e-01,  2.6751e-01,  2.8146e-01, -7.3594e-01,
        1.4706e-01, -1.8731e-01, -2.6153e-01, -3.1550e-01, -1.2383e-01,
       -3.6069e-01, -4.5851e-01,  2.0375e-01, -1.2857e-01,  1.5682e-01,
        4.8246e-01,  1.5320e-01,  3.4228e-01,  2.0119e-01,  6.0353e-02,
       -2.2986e+00,  1.8441e-01,  3.9956e-02,  9.2102e-02,  2.1067e-01,
       -2.8653e-01,  3.2233e-02, -1.5641e-02,  1.9779e-01, -9.2383e-01,
        2.4144e-01,  1.3394e-01, -3.7819e-01, -1.9981e-01,  1.0724e-01,
       -5.4530e-01,  1.3458e-01,  6.9629e-02, -5.2659e-01,  6.8730e-01,
       -3.9534e-01,  1.9915e-01,  1.6586e-02,  3.4376e-01,  3.9721e-01,
        1.2018e-01,  1.7831e-01,  1.9156e-01, -1.6315e-01,  3.4866e-02,
       -5.7168e-01, -2.4449e-01,  5.2592e-01,  2.7688e-01, -1.3461e-01,
        3.8613e-01, -8.5495e-01,  2.2794e-01, -3.1425e-01,  1.5339e+00,
        3.7696e-01, -8.3184e-02, -2.5739e-01, -4.3951e-01

- Start mit einem leeren Embeddings (alle 200 Werte/Dimensionen sind Null)
- Falls ein Wort von einem Tweet im Index ist, wird sein Embedding zu der leeren Liste hinzugefügt
- Falls ein Wort von einem Tweet nicht im Index ist, wird nichts zur leeren Liste hinzugefügt.


In [14]:
embedding_dim = 200 # 200, da die GloVe Embeddings auch 200 Dimensionen haben

embed_tweets = [] # Enthält das durchschnittliche Embedding für jeden Tweet

total_embeds = 0 
null_embeds = 0 

for tweet in clean_tweets:
    avg_embed = np.zeros(embedding_dim) 
    for word in tweet.split():
        total_embeds += 1
        embed = embeddings_index.get(word)
        if embed is not None:
            avg_embed += embed 
        else:
            null_embeds += 1
    embed_tweets.append(avg_embed/len(tweet.split()))

print("Anzahl an Embeddings:", total_embeds)
print("Anzahl von Null Embeddings:", null_embeds)
print("Prozentsatz von Null Embeddings: {}%".format(round(null_embeds/total_embeds,4)*100))

Anzahl an Embeddings: 777853
Anzahl von Null Embeddings: 67873
Prozentsatz von Null Embeddings: 8.73%


In [15]:
print(embed_tweets[0])

[ 8.27227219e-02 -1.25836954e-02  1.42558822e-01 -2.93737650e-02
 -1.71334692e-01  2.07635919e-01  1.92286847e-01 -1.35803923e-01
 -1.64672001e-01 -1.97241523e-02 -6.03387715e-02  1.82045386e-01
 -6.21922233e-01  3.58067467e-02 -5.34036959e-02 -1.64617724e-01
  2.08782381e-01 -6.76391539e-02 -1.63777677e-02  6.07279204e-02
  9.48146125e-02 -2.41376988e-02  3.19568490e-02 -4.65059243e-02
  9.65872361e-02  6.08897700e-01  1.35341766e-01  2.58786611e-01
  2.51609079e-01  2.00170381e-01 -2.53237651e-02 -7.17926783e-02
 -1.42555845e-01 -1.59361138e-01  1.47166994e-01  1.16333174e-01
  3.46482336e-02 -4.80318452e-02 -1.17006151e-02 -5.59257661e-02
  1.42039766e-01  5.83793833e-02  1.90013253e-02 -5.84308465e-02
  2.02483488e-01 -1.77830261e-01  6.12882307e-02  2.34673149e-01
 -1.80303090e-02 -2.37473077e-02  2.91548496e-02 -1.77656463e-01
  6.10000253e-03  7.79467725e-02  2.00889321e-02  4.98897665e-02
  1.11415207e-01 -7.20239991e-02  7.89283073e-02  1.33491542e-01
  9.55112465e-03  7.60802

In [16]:
# Mit der Hauptkomponentenanalyse (PCA) reduzieren wir die Dimension eines jeden Tweets auf 1.
pca = PCA(n_components=1, random_state = 2)
pca_tweets = pca.fit_transform(embed_tweets)

In [17]:
# Wir sehen, dass wir immer noch fast die gleiche Anzahl an Tweets haben. 
# Jeden Tweet stellt jetzt allerdings nur noch eine Zahl dar:
print("Anzahl: ", len(pca_tweets))
print(pca_tweets)

Anzahl:  38098
[[-0.32031705]
 [ 0.62261823]
 [-0.93506205]
 ...
 [ 0.43380925]
 [-0.59173517]
 [-0.5441486 ]]


In [18]:
# Ähnliche Tweets gruppieren mit der K-Means-Methode.
# n_clusters gibt an, wieviele unterschiedliche Gruppen wir wollen
kmeans = KMeans(n_clusters=4, max_iter = 1000, n_init = 20, random_state=2).fit(pca_tweets)
labels = kmeans.labels_

In [19]:
# Wie viele Tweets sind in jeder Gruppe enthalten:
pd.DataFrame(labels)[0].value_counts()

0    13492
3    13036
1     8308
2     3262
Name: 0, dtype: int64

In [20]:
# Stopwords aufzählen, damit diese nicht bei den meist vorkommenden Wörter aufgelistet werden:
stop_words = ['be','on','!','at','.',':','...','@',',','#','will','.m','in','a','the','with','to','by','and','my','is',
              'of','for','new','via','are','that','has','have','all','as','it','so','they','do','he','just','this',
              'was','who','your','from','his','about','get','but','am','up','if','can','would','than','should','dont',
              'had','or','were','did','there','got','even','its','an','i', 'not', 'our', 'we','you', '?','no','their', 'us','rt','great',
             'realdonaldtrump', 'trump', 'very', 'thank', 'thanks', 'president', 'donald', 'what', 'news', 'me', 'never', 'out', 'now', 'good',
             'when', 'like', 'one', 'more', 'run', 'time', 'best', 'going', 'much', 'want', 'big', 'make', 'again', 'many', 'been', 'today', 'him', 'pm', 'true',
             'mr', 'them', 'only', 'back', 'yes', 'need', 'why' , 'tonight', 'over', 'really', 'how', 'other', 'being', 'see', 'show', 'doing', 'think', 'must',
            'trump2016', 'makeamericagreatagain', 'apprenticenbc', 'fox', 'foxandfriends', 'can\'t', 'don\'t', '00', 'i\'m', 'know', 'celebapprentice', 'love', 'vote',
             'america', 'her', '7', 'watch', 'please', '2016', 'it\'s', 'tomorrow', 'she', 'country', 'people', 'go', 'first', 'soon', 'nice', 'years', 'hope', 'needs',
             'you\'re', 'work', 'keep', 'day', 'job', 'better', 'working', 'man', 'could', 'ever', 'done', 'say', 'amazing', 'support', 'bad', 'happy', 'believe',
             'right', 'well', 'always', 'last', 'amazing', 'win', 'which', 'because', 'way', 'real', 'u', '–', 'next', 'you\'ve', 'agree', 'running', 'wait', 'total',
             'said', '“', '“donald', '10']

In [21]:
# Für jede Gruppe die am meisten vorkommenden Wörter finden:
def most_common_words(group, n_words):
    vocab = {} # das Vokabular für jede Gruppe
    for i in range(len(clean_tweets)):
        if labels[i] == group:
            for word in clean_tweets[i].split():
                if word not in vocab:
                    vocab[word] = 1
                else:
                    vocab[word] += 1
      
    # Sortiere die am häufigst vorkommenden Wörter
    sorted_vocab = sorted(vocab.items(), key=operator.itemgetter(1), reverse=True)
    top_n_words = []
    for word, value in sorted_vocab:
        if word not in stop_words:
            top_n_words.append(word)
        if len(top_n_words) == n_words:
            break
    print(top_n_words)

In [22]:
# die am meisten vorkommenden Worte in jeder Gruppe:
groups = len(np.unique(labels))
for i in range(groups):
    print("Gruppe ", i+1,": ")
    most_common_words(i, 10)

Gruppe  1 : 
['obama', 'democrats', 'fake', 'china', 'deal', 'nothing', 'states', 'media', 'united', 'border']
Gruppe  2 : 
['interview', 'enjoy', 'course', 'golf', 'join', 'interviewed', 'poll', 'barackobama', 'obama', 'gop']
Gruppe  3 : 
['birthday', 'ivankatrump', '“trump', 'poll', 'gop', 'cnn', 'interview', 'nbc', 'golf', 'billmaher']
Gruppe  4 : 
['obama', 'barackobama', 'hillary', 'american', 'jobs', 'night', 'world', 'apprentice', 'house', 'china']


Durch diese Gruppen können übergreifende Topics deklariert werden. Zum Beispiel wäre ein mögliches Topic für Gruppe 1 "hillary".

In [74]:
def print_tweet_group(group, n_tweets):
    count = 1
    for i in range(len(clean_tweets)):
        if labels[i] == group:
            print("#{}: {}".format(count, clean_tweets[i]))
            count += 1
            if count == n_tweets+1:
                break

In [24]:
# Die ersten paar Tweets in jeder Gruppe:
n_tweets = 5
for i in range(groups):
    print("Gruppe #",i+1)
    print_tweet_group(i,n_tweets)
    print()

Gruppe # 1
#1: oh no the dems are worried again the only one that can kill this comeback is sleepy joe biden !
#2: no contest steve blows him away so important for montana i’ll be there to help steve win big 
#3: thanks for your invaluable help in getting a great man passed to run voice of america trying for 25 years and you got it done jim idaho is proud of you ! 
#4: so great to have michael home just arrived very exciting thank you to iran don’t wait until after us election to make the big deal i’m going to win you’ll make a better deal now ! 
#5: few people know where they’ll be in two years from now but i do in the great state of alaska which i love campaigning against senator lisa murkowski she voted against healthcare justice kavanaugh and much else

Gruppe # 2
#1: it’s a stupendous number it’s joyous let’s call it like it is the market was right it’s stunning !  @ jimcramer   @ cnbc
#2: these numbers are incredible !  @ mariabartiromo
#3: usa ! 
#4: true ! 
#5: interesting ! 



## Gruppen speichern

In [34]:
def save_tweet_group(group, n_tweets):
    for i in range(len(clean_tweets)):
        if labels[i] == group:
            print("{}".format(clean_tweets[i]))
        

In [51]:
gruppe1 = str(save_tweet_group(0, pd.DataFrame(labels)[0].value_counts()[0]))

oh no the dems are worried again the only one that can kill this comeback is sleepy joe biden !
no contest steve blows him away so important for montana i’ll be there to help steve win big 
thanks for your invaluable help in getting a great man passed to run voice of america trying for 25 years and you got it done jim idaho is proud of you ! 
so great to have michael home just arrived very exciting thank you to iran don’t wait until after us election to make the big deal i’m going to win you’ll make a better deal now ! 
few people know where they’ll be in two years from now but i do in the great state of alaska which i love campaigning against senator lisa murkowski she voted against healthcare justice kavanaugh and much else
great to be with our wonderful men and women of the  @ secretservice what a job they are doing ! 
the problem with asking for someone to give you a letter of resignation which you do as a courtesy to help them save face is that it is then harder to say you fired t

is about american freedom redirect the supply chain there is no reason to buy everything from china !
peter morici economist :  tariffs will not impact american consumers that much because the chinese currency has gone down which gives our importers a discount importers can find suppliers outside of china absolutely worth it we don’t want to be servants to the chinese ! this
enjoy the low gas prices over the labor day weekend ! my energy policies have made america energy independent while keeping prices low just like a tax cut the democrats “green” policies will raise your price of gas ! 
but now we are moving forward like never before we are winning again and we are respected again !
just watched congresswoman debbie dingell and many other democrats wanting to give up on our very successful trade battle with china which has had its worst economic year in memory and getting worse we are taking in $billions will be big for farmers and all !
has anyone noticed that the top shows on  @ fo

no collusion no obstruction  but that doesn’t matter because the 13 angry democrats who are only after republicans and totally protecting democrats want this witch hunt to drag out to the november election republicans better get smart fast and expose what they are doing !
the nfl national anthem debate is alive and well again  can’t believe it ! isn’t it in contract that players must stand at attention hand on heart ? the $40000000 commissioner must now make a stand first time kneeling out for game second time kneeling out for season/no pay !
so important should have been done years ago ! 
china the european union and others have been manipulating their currencies and interest rates lower while the us is raising rates while the dollars gets stronger and stronger with each passing day  taking away our big competitive edge as usual not a level playing field
my deepest sympathies to the families and friends of those involved in the terrible boat accident which just took place in missouri 

while all agree the u s president has the complete power to pardon why think of that when only crime so far is leaks against usfake  news 
this morning i will be going to the commissioning ceremony for the largest aircraft carrier in the world the gerald r ford  norfolk va
sean spicer is a wonderful person who took tremendous abuse from the fake  news  media  but his future is bright !
today it was my privilege to welcome survivors of the  # ussarizona to the wh remarks :  
i am asking all citizens to believe in yourselves believe in your future and believe once more in america  # americafirst ? 
melania and i send our thoughts and prayers to senator mccain cindy and their entire family get well soon 
i will be having lunch at the white house today with republican senators concerning healthcare they must keep their promise to america !
the fake  news  is becoming more and more dishonest ! even a dinner arranged for top 20 leaders in germany is made to look sinister !
as i have always s

now that the mexican drug lord escaped from prison everyone is saying that most of the cocaine etc coming into the us comes over border !
i hear that sleepy eyes  @ chucktodd will be fired like a dog from ratings starved meet the press ? i can't imagine what is taking so long !
phoenix convention center officials did not want to have thousands of people standing outside in the heat so they let them in a great day !
i will not be able to attend the miss usa pageant tomorrow night because i am campaigning in phoenix wishing all well !
getting ready to go to las vegas freedom fest  great crowd then on to amazing phoenix  that will be a total happening ! love america
has anyone seen the financials of  @ univision they are doing really badly too much debt and not enough viewers need money fast funny !
i love that thousands of people are boycotting  @ macys and cutting up credit cards no guts no glory this really backfired  love it !
our biggest problems are solved by growth we need a presid

have to go now to sign a great and job producing deal ! good night
 @ johntisdall realdonaldtrump you owe it to this country go for it you can fix it  i definitely can !
 @ generaltso316 realdonaldtrump i have two of your ties and they are better quality than ties i own that cost $125 at macy's great
snowden is a spy who has caused great damage to the us a spy in the old days when our country was respected and strong would be executed
 @ rwill9584 realdonaldtrump buying the bills would be the best thing for not only the team but for the city of buffalo !  @ nflcommish
i give secretary of state john kerry credit for working and trying hard but he has zero negotiating ability !
remember russia still has snowden when are we going to bring that piece of human garbage back home to stand trial ? he caused great damage !
i will be working late into the evening closing a big real estate deal—soon to be announced happy easter and/or holiday to all
 @ mjohnstonxoxo   my favorite part of school i

lots of people are asking whether or not i should have run for president—stay tuned for the answer
us small businesses are truly worried about rising healthcare costs and taxes 
the iranians are having 'difficulties' with their nuclear program 
the freezing cold weather across the country is brutal  must be all that global warming
“one of the keys to thinking big is total focus” – the art of the deal
it all comes down to one simple question :  how much money can you stand to lose ? that’s how much risk you should assume
the republicans better be careful  obama is out to destroy them !
the real estate market is slowly improving  still a great time to buy  you will thank me in 5 years
terrible for the economy and a job killer china is laughing at us !
wind farms are now being paid to shut down 
 @ securitylecture  that is not the reason
you have to love what you do or you are never going to be successful no matter what you do in life”  think big
i still can’t believe we left iraq without

wake up america  china is eating our lunch
all those politicians in washington and not one good negotiator
why i would not have approved the deal 
no taxes the only good thing about dc debt deal
the more you learn about the debt deal the worse it gets
republicans and democrats have both created our economic problems
why should we have any defense cuts in any deal ? america must remain strong
this is the best deal the republicans could get ?
disappointed in gop and demsgiving obama power to raise the debt limit next year is  a mistake
we need economic growth and jobs not blue ribbon panels to study the problem
if obama has to refight this fight next year he loses   watch the fine details in every deal  the art of the deal
blue ribbon commission to find and agree to future spending cuts ? bad idea
never quit and always hit back  the art of the comeback
i appreciate the kind words of mike huckabee a fine american 
 @ johnboehner  the worst thing you can possibly do in a deal is seem despe

In [60]:
gruppe2 = str(save_tweet_group(1, pd.DataFrame(labels)[0].value_counts()[1]))

it’s a stupendous number it’s joyous let’s call it like it is the market was right it’s stunning !  @ jimcramer   @ cnbc
these numbers are incredible !  @ mariabartiromo
usa ! 
true ! 
interesting ! 
unfair ! 
nasty ! 
caught ! 
yes ! 
wow very impressive josh ! 
silent majority !
yes ! 
100 correct thank you tom ! 
fake  news  !
strength ! 
hopefully a great successful and safe rocket launch lifting off soon ?   @ fox news    @ oann
time for a change !  # 2020 
china !
i’m proud to commit $409m in  @ usdot funding to milwaukee’s eastwest bus rapid transit project bringing modern transit to the region’s most critical corridor and spur millions in economic develo pm ent love wisconsin !  @ ridemcts
ogden utah i am committing $645m to build bus service between weber state university and mckaydee hospital—very important services for utahans !  @ rideuta
i’m excited to commit $100m to  @ miamidadecounty fl in  @ usdot funding to connect fastgrowing communities through stateoftheart transit

 @ shoneep realdonaldtrump trump for president ! bernie is a joke knows nothing  and hillary is yesterday's and today's nightmare
 @ loudobbs :  hillary just handed  @ realdonaldtrump a huge gift :  promising to put bubba in charge of the economy !  # makeamericagreatagain !
 @ sandrajeanne48 :  no way i believe trump at 70 disapproval with women went to 3 rallies at least 1/2 women msm lies  @ thefive
 @ michaelfavreau jis3  @ realdonaldtrump he will destroy hillary but why give cnn record breaking ratings cnn can go to hell trump 2016
 @ thydanielflores megynkelly  @ realdonaldtrump best interview that i have ever seen
 @ svhlevi diamondandsilk  @ realjeffreylord  @ realdonaldtrump  @ cnn and we love you diamond and silk  i do also !
 @ markgruber1960 megynkelly  @ realdonaldtrump that's why he is so successful he is driven to succeed  true !
 @ johnkirtley :    @ megynkelly  @ realdonaldtrump  # makeamericagreatagain thank you for this discourse wounds have been healed great job !
 

 @ yankzpat :  hey ! i got my photos autographed and shook  @ realdonaldtrump 's hand in mason city  # june16th 
 @ bobzilla305 realdonaldtrump  @ krauthammer is a progressive making money off of the conservative viewership a total loser !
 @ foxandfriends :  donald trump :  i would be the jobs president 
 @ eatsleepdan :  i want to see  @ realdonaldtrump run for president
 @ closethedealtv :  go to work be smart think positively and win !  @ realdonaldtrump  # quote
 @ raeraeluv i'm on the fence but if  @ realdonaldtrump runs then i'm all good
trump nat'l golf club philadelphia is a 360 acre beauty and an award winning tom fazio designed coursefantastic ! 
 @ vilmawolfe03  @ realdonaldtrump  @ davevin73  # you get my  # vote for president of the unites states of america !  ? ?
 @ jollygoodman181 :  vote trump for president 2016 americans last hope donald trump let's take our country back vote for the don 2016
 @ jm9145 realdonaldtrump please run usa needs a leader !
 @ davidkaifaith :

 @ shadesoflilac :  rt “ @ realdonaldtrump :  we should not be importing the disease to our homeland” completely agree mr trump
located in south ayrshire scotland  @ trumpturnberry offers diverse dining options suitable for any occasion 
the ultimate vacation destination  @ trumppanama’s sleek design evokes a majestic sail fully deployed in the wind 
 @ illusiverealm :  just bought a  @ realdonaldtrump tie from  @ macys it's beautiful great style great they are top quality at reasonable price
 @ coximus2 :  oh donald don't ever change  @ realdonaldtrump i will try not to !
 @ leannelovesart realdonaldtrump new york city hospital testing patient for possible ebola infection 
mayor bill vescio of briarcliff  manor westchester is doing a terrible job horrible roads high taxes housing down  @ westchestergov
 @ krisinal realdonaldtrump  @ frankluntz explain ? he's a pollster ?  yes but a really bad and boring one !
 @ nicolenrouse :  looking forward to my work week in  # chicago and my favo

 @ mkerob realdonaldtrump great appearance last night on letterman first stern now dave two awesome interviews thanks !
 @ golf4oregod realdonaldtrump great job on letterman thanks !
 @ michaelscarbrou realdonaldtrump good job donald i'll call nbc a see if they'll giv you leno's spotlol
 @ sarahesmith9 realdonaldtrump did awesome on letterman ! i just love him ! thanks sarah !
 @ jrii realdonaldtrump you were awesome ! honest personable intelligent ! thanks !
 @ lizzyoconnor realdonaldtrump is just amazing made my night watching him on letterman  !  # trumpnationaldoral  !  thanks lizzy !
 @ greener17wing realdonaldtrump ya hit it out of the park  # preztrump from a canadian  # yourhired  thanks !
 @ carminemig21 realdonaldtrump u were great   thanks carmine !
 @ craigashwood realdonaldtrump nice job mrtrump ! smooth relaxed and in good humour thanks !
 @ scottinapac realdonaldtrump paired very well with letterman tonight thanks !
 @ higgie0 realdonaldtrump  @ lateshow watched you lett

In [61]:
gruppe3 = str(save_tweet_group(2, pd.DataFrame(labels)[0].value_counts()[2]))

disgraceful ! 
section 230 ! 
november 3rd
revoke 230 !
obamagate !
patriots ! 
100 correct ! 
obamagate !
“scandal has defined the obama administration”  @ lisamarieboothe  @ ffweekend
obamagate !
shifty schiff exposed ! 
coldcasejoe ! 
interestingly ! 
obamagate ! 
obamagate ! 
obamagate !
lyin’ brian williams of msdnc ! 
thank you  @ blueangels and  @ afthunderbirds !  # americastrong 
congratulations  @ afacademy ! 
liberate michigan !
liberate minnesota !
 # thanksfordelivering  @ ups ! 
disgraceful ! 
thank you  @ deptofdefense  @ usacehq  @ usnationalguard  @ fema and  @ cdcgov ! 
thank you  @ jennpellegrino  @ oann 
covid19 update 
 # prayfornashville 
go admiral ronny ! 
thank you  @ cpac !  # cpac2020 
spelled  @ giannocaldwell !
thank you !  # maga  # kag2020 
thank you !  # maga  # kag2020 
thank you !  # maga  # kag2020 
 # daytona500 
prosecutorial misconduct ? 
disgraceful ! 
jobs jobs jobs ! # promisesmadepromiseskept ? 
“what really happened”
 # sotu2020 
 @ lindseygra

 # trumpvlog south african justice 
 @ bvlphonso trump2016  @ realdonaldtrump
 @ bkofleader :  
via  @ dmregister by  @ sharynjackson :  “trump stevekingia has 'the right views' 
 @ realsquezz realdonaldtrump  # presidenttrump2016 for sure !
 @ dreamhousex :  donald trump's palm beach mansion  ? 
 @ beliywulf realdonaldtrump  @ ronmeier123 save america trump
 @ yusiddiqui  @ piersmorgan  @ rustyrockets  i got much better—no contest—i got melania !
 @ usprogress realdonaldtrump for president !
 @ careytim6 :  hope  @ realdonaldtrump runs for president in 2016  # trump2016
 @ einsteincassidy :  donald trump's scottish hotel awarded fivestar rating  
 @ hotelchatter :  it's five stars for donald trump's scottish hotel :  
trump int'l golf links scotland awarded 5 star status by scottish tourism chiefs via mailonline 
 @ coxdanimal realdonaldtrump trump for president  # 2016
 @ foxandfriends in 15 minutes !
 @ sinnon7 :  see inside  @ realdonaldtrump's five star scottish hotel 
 @ leonapel

In [62]:
gruppe4 = str(save_tweet_group(3, pd.DataFrame(labels)[0].value_counts()[3]))

it is a stunner by any stretch of the imagination !  @ cnbc
congratulations to wonderful charles payne on having been so optimistic and therefore correct market up big ! !
i will be doing a  news  conference at 10 : 00 am on the jobs numbers ! white house
this is an amazing jobs report ! edward lawrence  @ fox news 
i am so stunned i’ve never seen numbers like this and i’ve been doing this for 30 years ! steve m  @ mariabartiromo
really big jobs report great going president trump kidding but true !
true steve is a great senator need him badly in washington complete and total endorsement ! 
great going mike ! 
great going brad ! 
we just landed wisconsin a massive navy shipbuilding contract beautiful designs ! 
a must watch ! 
unrelated i gave alaska anwr major highways and more get any candidate ready good or bad i don’t care i’m endorsing if you have a pulse i’m with you !
“police groups break with biden”  @ politico his handlers want him to “defund the police” i want more money for l

congressman neal dunn  @ dunncampaign of florida has done an outstanding job at everything having to do with  # maga now working hard on hurricane relief and rebuild strong on crime strong on borders loves our military and our vets neal has my highest endorsement !
“president donald j trump is following through on his promise to cut burdensome red tape and unleash the american economy”read more :  
“network  news  gave zero coverage to the big day the stock market had yesterday”  @ foxandfriends
stock market up 548 points today also great jobs numbers !
“conflict between glen simpson’s testimony to another house panel about his contact with justice department official bruce ohr ohr was used by simpson and steele as a back channel to get fake dossier to fbi simpson pleading fifth” catherine herridge where is jeff sessions ?
“federal judge throws out stormy danials lawsuit versus trump trump is entitled to full legal fees”  @ fox news  great now i can go after horseface and her 3rd rate 

what an evening in las vegas nevada ! thank you for your continued support  # trump2016 
can't believe major league baseball just rejected  @ peterose14 for the hall of fame he's paid the price so ridiculous  let him in !
 @ scottienhughes  you were fantastic on cnn thank you for the nice words see you at the  # gopdebate
record of health :  
why doesn't  @ fox news  quote the new iowa  @ cnn poll where i have a 33 to 20 lead over ted cruz and all others think about it !
 @ ajodom60 fox news  and as far as that lowinfo voter base goes i have an iq of 132 so much for that theory  # makeamericagreatagain
 @ lesgartcpa jaketapper  @ realdonaldtrump problem is i never watched you until trump started giving you interviews and ratings on cnn
 @ j pm organ2016 :  trump needs his own poll a real poll in every state these polls are so ridiculousthe media will only get worse from here
 @ 1rdgreenberg :    @ fox news  that's why after your interview i couldn't watch the panel discussion which is 

 @ thestangshow realdonaldtrump you the man ! you can never be too young to be a fan of you !  # donaldtrumprocks thanks
 @ jfoxy babcocksflyhair  @ realdonaldtrump do not listen don you are great and should be presidant thanks
joan rivers on the apprentice tonight at 8 : 00 i will be live tweeting joan was great !
the apprentice will be very exciting and interesting tonight at 8 : 00 joan rivers puts on a great show !
i look forward to being in south carolina tomorrow a total sellout crowd !
 @ sritacolombia :  do you think that  # missuniverse is like a world cup ? totally agree  @ realdonaldtrump
 @ notteridax :  hey  @ realdonaldtrump you'd be the best president ever if you ran right now you're hotter than ever !
 @ pjleger realdonaldtrump in my opinion this is the best season of the celebrity apprentice ! !
monday night at 8 : 00 will be must see television our wonderful joan rivers plays a major role as my advisor on the apprentice amazing !
 @ bjkizer74 : donaldtrump why do we k

 @ txshaun realdonaldtrump happy birthday donald ! thanks for never being afraid to tell it like it is
 @ keegster51 :  happy birthday to my idol  @ realdonaldtrump ! i hope one day to be as successful as you !  # trump2016  work hard !
 @ felberjosh :  happy  # birthday to an awesome business icon ! read my first trump book at 14 which jump started my business success thanks
 @ doughutchins77 realdonaldtrump the world just won't be safe until the name trump is emblazoned on the white house
 @ dbigrock34 realdonaldtrump like i said trump for president usa needs a positive change
 @ commishjoe :  happy birthday to successful businessman and true american success story he certainly tells it like it is ! thank you
 @ glock34girl realdonaldtrump please run and get america back on its feet  stay tuned !
turnberry in scotland is a far superior golf course to pinehurst  and it isn't even close ! likewise the blue monster at doral
 @ ruvie18 :  happy birthday  @ realdonaldtrum pm y best author

 @ joecodling realdonaldtrump your book's doing well in the uk mate   thanks
 @ jmkjmkjmkjmkjmk realdonaldtrump just went on a spree at macy's four of your fantastic ties and a pinstripe shirt great stuff love it !
 @ trandusa realdonaldtrump we expect you to have some more investments in turkeyistanbul mr trump  love istanbul !
 @ saskatweet realdonaldtrump how on do you expect pilots to be after crashing a plane ?  who cares they caused the crash !
 @ realraylong realdonaldtrump :  you know so much about so many things  a true renaissance man  # trumpin2016  so true !    thanks
 @ articmink realdonaldtrump just what i needed to hear love to be a follower of  @ realdonaldtrump you're amazing mr trump !  thanks
 @ sgosper realdonaldtrump  he's a foreign pilot we couldn't drug test him even if we wanted to !  you must be kidding !
 @ newnonny realdonaldtrump um how the hell do you know  because i'm very smart dummy !
 @ ccsaunders realdonaldtrump my dad played the new scotland course to

the independent watchdog who exonerated  @ barackobama for the failed green energy loans just donated $52500 to obama's campaign
the opening of  # trumpscotland an exciting day on perhaps the world's best golf course watch the video 
 @ arsenioofficial you have already made plenty because of me how about a 50/50 cutjust joking see you soon !
amazing @ vanityfair survived one more day without folding the clock is ticking
statesman of the year in sarasota fl on sunday nightwill be terrifica total sellout
wind turbines are a scourge to communities and wildlife they are environmental disasters
it is truly an honor that his eminence archbishop of new york  @ cardinaldolan will be delivering the benediction at the  @ rnc convention
it's thursday i wonder how much money  @ barackobama drained from medicare today to finance obamacare
today we just passed 14 million twitter followers
one of the best moves i made early in my career was buying the air rights from tiffany's flagship trump tower gl

In [73]:
# test ob alle tweets aus der jeweiligen Gruppe ausgegeben wurden:
with open('gruppe4.txt', encoding="utf8") as f:
    print(sum(1 for _ in f))

13036
