# Table of Content
- [Load Datasets](#load_data)
- [Prepare Data](#prepare_data)
    - [Unify & Concat Dataframes](#concat_data)
    - [Create Testdata & Trainingdata](#split_data)
    - [Tokenize Strings](#tokenize_strings)
- [Training](#train_data)
- [Deploy & Test data](#deploy_data)
    - [Export Model](#export_model)
    - [Have some fun](#fun)

This project is based on Tensorflows tutorial for basic text classification:
https://www.tensorflow.org/tutorials/keras/text_classification

<a id="load_data"></a>
# Load Datasets

In [None]:
import pandas as pd

In [None]:
trump_df = pd.read_csv("./data/RealDonaldTrumpTweets.csv")
trump_df.head()

In [None]:
biden_df = pd.read_csv("./data/JoeBidenTweets.csv")
biden_df.head()

In [None]:
musk_df = pd.read_csv("./data/ElonMuskTweets.csv")
musk_df.head()

In [None]:
kardashian_df = pd.read_csv("./data/KimKardashianTweets.csv")
kardashian_df.head()

<a id="prepare_data"></a>
# Prepare Data

<a id="concat_data"></a>
## Unify & Concat Dataframes

In [None]:
trump_unified_df = trump_df.rename(columns={"content": "tweet"})
trump_unified_df["author"] = 0

In [None]:
biden_unified_df = biden_df
biden_unified_df["author"] = 1

In [None]:
musk_unified_df = musk_df.rename(columns={"Tweet": "tweet"})
musk_unified_df["author"] = 2

In [None]:
kardashian_unified_df = kardashian_df.reset_index()

In [None]:
kardashian_unified_df.rename(columns={"text": "tweet"}, inplace=True)
kardashian_unified_df["author"] = 3

In [None]:
features = ["tweet", "author"]

In [None]:
df = pd.concat([
    trump_unified_df[features],
    biden_unified_df[features],
    musk_unified_df[features],
    kardashian_unified_df[features]
])

In [None]:
df.head()

<a id="split_data"></a>
## Trainingdata & Testdata Split

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
df_train, df_test = train_test_split(
    df, test_size=0.15, shuffle=True
)

<a id="tokenize_strings"></a>
## Tokenize Strings

In [None]:
import numpy as np
import tensorflow as tf

from tensorflow.keras.layers.experimental.preprocessing import TextVectorization

In [None]:
max_features = 10000
sequence_length = 200

In [None]:
vectorize_layer = TextVectorization(
    max_tokens=max_features,
    output_mode='int',
    output_sequence_length=sequence_length
)

In [None]:
vectorize_layer.adapt(df_train["tweet"].values)

In [None]:
# This execution will need some time do be finished!
# It took me 5 minutes for that cell
tokenized_training_strings = np.array([
    vectorize_layer(
        tf.expand_dims(tweet, -1)
    )[0] for tweet in df_train["tweet"].values
])

In [None]:
tokenized_testing_strings = np.array([
    vectorize_layer(
        tf.expand_dims(tweet, -1)
    )[0] for tweet in df_test["tweet"].values
])

<a id="train_data"></a>
# Train Data

In [None]:
from tensorflow.keras import layers
from tensorflow.keras import losses

In [None]:
embedding_dim = 16

In [None]:
model = tf.keras.Sequential([
    layers.Embedding(max_features + 1, embedding_dim),
    layers.Dropout(0.2),
    layers.GlobalAveragePooling1D(),
    layers.Dropout(0.2),
    layers.Dense(4)
])

model.summary()

In [None]:
model.compile(
    loss=losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer='adam',
    metrics=['accuracy']
)

In [None]:
epochs = 10

In [None]:
history = model.fit(
    x=tokenized_training_strings,
    y=df_train["author"],
    validation_data=(tokenized_testing_strings, df_test["author"]),
    epochs=epochs
)

<a id="deploy_data"></a>
# Deploy & Test Data

<a id="export_model"></a>
## Export Model

In [None]:
export_model = tf.keras.Sequential([
    vectorize_layer,
    model,
    layers.Activation('sigmoid')
])

export_model.compile(
    loss=losses.SparseCategoricalCrossentropy(from_logits=False),
    optimizer="adam",
    metrics=['accuracy']
)

<a id="fun"></a>
## Have some fun with Model

In [None]:
biden_tweet = """
    We're on the right track. The American Rescue Plan laid a strong foundation for a new economy that brings everybody along, but it's just the first step. We must pass the American Jobs Plan to build an economy that works for everyone.
"""

In [None]:
musk_tweet = """
    0 to 60mph in under 2 secs. Quickest production car ever made of any kind. Has to be felt to be believed.
"""

In [None]:
trump_tweet = """
    96% Approval Rating in the Republican Party. Thank you!
"""

In [None]:
kardashian_tweet = """
    I’m so embarrassing about wanting to post that pic!!!! Should I post it on National Peach Day!!!????
"""

Remember:
- 0: Trump
- 1: Biden
- 2: Musk
- 3: Kardashian

In [None]:
export_model.predict([
    biden_tweet,
    musk_tweet,
    trump_tweet,
    kardashian_tweet
])