# Import Essential Libraires

In [2]:
import pandas as pd
import markovify

# Read data

In [15]:
dataset = pd.read_csv("data/stock_market_tweets.csv")
dataset.head()

Unnamed: 0.1,Unnamed: 0,tweet_id,writer,post_date,body,comment_num,retweet_num,like_num,ticker_symbol
0,0,550441672312512512,KeralaGuy77,2015-01-01,Insanity of today weirdo massive selling. $aap...,0,0,0,AAPL
1,1,550452877466935296,TheTrendIsUp,2015-01-01,My biggest winner in 2014: Inverse Volatility ...,1,0,0,AAPL
2,2,550456665607122944,t_nathan95,2015-01-01,Had a down day of -.66%. Worst performer was $...,0,0,0,AAPL
3,3,550459042787651584,petergo99037185,2015-01-01,"YR %, /-, $TSLA 47.85%, $FB 42.77%, $TWTR -...",0,0,0,AAPL
4,4,550461555423584257,t_nathan95,2015-01-01,Prediction: $TWTR $GRPN $YELP are acquired as ...,0,0,1,GOOG


In [4]:
# Get tweets from the 'body' column and filter None or empty values
tweets = [tweet for tweet in dataset['body'].tolist() if tweet and isinstance(tweet, str)]
tweets[:5]

['Insanity of today weirdo massive selling. $aapl bid up 45 cents after hours after non stop selling in trading hours',
 'My biggest winner in 2014: Inverse Volatility ETF $XIVMy biggest loser in 2014: Apple $AAPL',
 'Had a down day of -.66%. Worst performer was $AAPL down -1.9% and best was $SBUX up _.32%. #Performance #Transparency',
 'YR %,  /-, $TSLA  47.85%, $FB  42.77%, $TWTR -43.64%, $AAPL  37.72%, $GLD -2.19%, $SLV -19.51%, Wti Crude $OIL -41.11%, @NG.1 NatGas -33.12%',
 'Prediction: $TWTR $GRPN $YELP are acquired as big tech ( $BABA $GOOGL $GOOG $YHOO) look to increase their shares in social media']

# Create Model

In [5]:
class MarkovChainTextGenerator:
    def __init__(self, tweets, state_size=1):
        self.model = self.build_model(tweets, state_size)

    def build_model(self, tweets, state_size):
        # Filter out None values
        tweets = [tweet for tweet in tweets if tweet is not None]
     
        # Handle sentence boundaries more carefully by adding a period only if it doesn't end with punctuation
        text = " ".join([tweet if tweet.endswith(('.', '!', '?')) else tweet + '.' for tweet in tweets])

        # Build the Markovify model with the specified state size
        return markovify.Text(text, state_size=state_size)

    def generate_tweet(self, max_length=140):
        # Try generating a tweet, and handle cases where generation might fail
        try:
            return self.model.make_short_sentence(max_length)
        except KeyError:
            # Fallback in case of KeyError
            return "Model failed to generate a tweet. Try training with more data."

In [6]:
# Create an instance from class trained on data
tweet_generator = MarkovChainTextGenerator(tweets, state_size=3)

# Inference

In [7]:
# Generate and print 5 sample tweets
for _ in range(5):
    print(tweet_generator.generate_tweet())

$TSLA i always said this is going to explode tomorrow.
@FMossotto tracks the ships and trucks delivering oil and gas business!Dude is TRASHED!Hates Elon and is LOUD!
I love watching the $tsla / $tslaq Q1 flash gets published at...
That's Norway to you and an asshole to return it?
These stock experts have been wrong before $BTC $BCH $ETH #crypto #bitcoin #WarrenBuffett.
