# (1) Twitter Data
## (1.1) Getting Twitter data 2021 from API

In [1]:
import pandas as pd
from path import Path
from twarc import Twarc2, expansions
import json
import datetime
import os

In [2]:
from config import bearer_token

In [3]:
client = Twarc2(bearer_token=bearer_token)

In [4]:
user = 'elonmusk'
posts_dict = {
    'date':[],
    'text':[],
    'like_count':[],
    'reply_count':[],
    'retweet_count':[]
}

In [5]:
# pull posts from Twitter and create a dictionary
user_timeline = client.timeline(user=user, exclude_replies=True, start_time=datetime.datetime(2021,1,1, 0, 0, 0) )
for page in user_timeline:
    result = expansions.flatten(page)
    for tweet in result:
        posts_dict['date'].append(tweet['created_at'])
        posts_dict['text'].append(tweet['text'])
        posts_dict['like_count'].append(tweet['public_metrics']['like_count'])
        posts_dict['reply_count'].append(tweet['public_metrics']['reply_count'])
        posts_dict['retweet_count'].append(tweet['public_metrics']['retweet_count'])

In [6]:
# convert dictionary of posts to dataframe
twitter_2021 = pd.DataFrame.from_dict(posts_dict)
twitter_2021.head()

Unnamed: 0,date,text,like_count,reply_count,retweet_count
0,2021-09-18T23:13:01.000Z,Congratulations @Inspiration4x!!!,111742,6227,7878
1,2021-09-18T23:12:22.000Z,RT @SpaceX: Splashdown! Welcome back to planet...,0,0,15445
2,2021-09-18T21:41:06.000Z,RT @SpaceX: Dragon has entered its last orbit ...,0,0,2076
3,2021-09-18T19:01:40.000Z,RT @SpaceX: Orbital moonrise https://t.co/vrx8...,0,0,4585
4,2021-09-18T04:54:38.000Z,"Moving at ~23 times speed of sound, circling E...",99941,4379,9596


In [7]:
# convert date to datetime datatype
twitter_2021['date'] = pd.to_datetime(twitter_2021['date']).dt.date.astype('datetime64')
twitter_2021.tail()

Unnamed: 0,date,text,like_count,reply_count,retweet_count
542,2021-01-07,This is called the domino effect https://t.co/...,363374,4442,36994
543,2021-01-04,"Because of the large footprint, it may seem fl...",57381,1368,1055
544,2021-01-04,Snow falling on Giga Berlin https://t.co/eTXMt...,147180,3609,6790
545,2021-01-02,So proud of the Tesla team for achieving this ...,108925,4104,6157
546,2021-01-02,"RT @Tesla: In 2020, we produced and delivered ...",0,0,6175


## (1.2) Getting Twitter data 2011 - 2020 from archive

In [8]:
# load twitter data from csv file
file_to_load = os.path.join('Data', 'elon_musk_tweets_2011-2021.csv')
twitter_archive = pd.read_csv(file_to_load)
twitter_archive.head()

Unnamed: 0.1,Unnamed: 0,id,conversation_id,created_at,date,timezone,place,tweet,language,hashtags,...,geo,source,user_rt_id,user_rt,retweet_id,reply_to,retweet_date,translate,trans_src,trans_dest
0,0,1343644462036086785,1343320495127633920,1609185000000.0,2020-12-28 19:46:18,0,,Entertainment will be critical when cars drive...,en,[],...,,,,,,[],,,,
1,1,1343619610617077760,1343386617294295040,1609179000000.0,2020-12-28 18:07:33,0,,@kimpaquette Just meeting with Larry Ellison t...,en,[],...,,,,,,"[{'screen_name': 'kimpaquette', 'name': 'Kim P...",,,,
2,2,1343608616960491521,1343576442722893825,1609176000000.0,2020-12-28 17:23:51,0,,@richierichhhhh_ Absolutely,en,[],...,,,,,,"[{'screen_name': 'richierichhhhh_', 'name': 'R...",,,,
3,3,1343608530998153222,1343320495127633920,1609176000000.0,2020-12-28 17:23:31,0,,What should Tesla do with in-car gaming in an ...,en,[],...,,,,,,[],,,,
4,4,1343431408052662273,1343043963096326147,1609134000000.0,2020-12-28 05:39:42,0,,@PPathole @WSJ Absolutely,en,[],...,,,,,,"[{'screen_name': 'PPathole', 'name': 'Pranay P...",,,,


In [9]:
# select and rename columns
twitter_archive_clean = twitter_archive[['date', 'tweet', 'nlikes', 'nreplies', 'nretweets']]\
                            .loc[(twitter_archive['reply_to'] == '[]') & (twitter_archive['retweet'] == False)]
twitter_archive_clean.columns=['date', 'text', 'like_count', 'reply_count', 'retweet_count']

# convert date to datetime datatype
twitter_archive_clean['date'] = pd.to_datetime(twitter_archive_clean['date']).dt.date.astype('datetime64')

# drop last row with 1 tweet in 2011
twitter_archive_clean.drop(twitter_archive_clean.tail(1).index,inplace=True)

twitter_archive_clean.head()

Unnamed: 0,date,text,like_count,reply_count,retweet_count
0,2020-12-28,Entertainment will be critical when cars drive...,55085,2922,2611
3,2020-12-28,What should Tesla do with in-car gaming in an ...,33830,6932,884
6,2020-12-27,Try playing Polytopia in your Tesla! Great gam...,148037,5355,4186
34,2020-12-25,"Change your horn sound to 🐐, 🐍🎷, 💨 or holiday ...",187368,5373,6983
35,2020-12-25,Merry Christmas &amp; happy holidays! 🎁 https...,236833,7496,13288


## (1.3) Clean the twitter data

In [10]:
# concatenate 2 datasets to get tweets from 2011 to 2021
twitter_df_merged = pd.concat([twitter_2021, twitter_archive_clean])
twitter_df_merged.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4732 entries, 0 to 11715
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   date           4732 non-null   datetime64[ns]
 1   text           4732 non-null   object        
 2   like_count     4732 non-null   int64         
 3   reply_count    4732 non-null   int64         
 4   retweet_count  4732 non-null   int64         
dtypes: datetime64[ns](1), int64(3), object(1)
memory usage: 221.8+ KB


In [11]:
# Drop the NaNs
twitter_df_merged.dropna()

Unnamed: 0,date,text,like_count,reply_count,retweet_count
0,2021-09-18,Congratulations @Inspiration4x!!!,111742,6227,7878
1,2021-09-18,RT @SpaceX: Splashdown! Welcome back to planet...,0,0,15445
2,2021-09-18,RT @SpaceX: Dragon has entered its last orbit ...,0,0,2076
3,2021-09-18,RT @SpaceX: Orbital moonrise https://t.co/vrx8...,0,0,4585
4,2021-09-18,"Moving at ~23 times speed of sound, circling E...",99941,4379,9596
...,...,...,...,...,...
11711,2011-12-04,Am reading a great biography of Ben Franklin b...,65,17,9
11712,2011-12-03,That was a total non sequitur btw,53,31,6
11713,2011-12-03,"Great Voltaire quote, arguably better than Twa...",29,7,25
11714,2011-12-01,I made the volume on the Model S http://t.co/...,78,31,9


In [12]:
# export all tweets for analysis in Tableau
twitter_df_merged.to_csv('Data/tweets_data_2011_2021_ungrouped.csv', index=False)

## (1.4) Preprocessing the Twitter data

**Preprocess the data by making it all lowercase. Remove a reasonable set of stopwords from the dataset and tokenize. Then, report the 10 most common words and their count. We need to iterate this process, adding some stop words as we understand the structure of the data. Justify additional stop words we've added.**

In [13]:
from sklearn.feature_extraction.text import CountVectorizer
from nltk.corpus import stopwords
from nltk.tokenize import RegexpTokenizer
from datetime import datetime
from nltk.stem import PorterStemmer
import re
import nltk
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/gobinaththangaiya/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [14]:
# group tweets that posted at the same day
def f(x):
     return pd.Series(dict(like_count = x['like_count'].sum(),
                        reply_count = x['reply_count'].sum(),
                        retweet_count = x['retweet_count'].sum(),
                        text = "{%s}" % ', '.join(x['text'])))

In [15]:
twitter_df_merged = twitter_df_merged.groupby('date').apply(f).reset_index()
twitter_df_merged.head()


Unnamed: 0,date,like_count,reply_count,retweet_count,text
0,2011-12-01,267,63,24,{I made the volume on the Model S http://t.co...
1,2011-12-03,82,38,31,"{That was a total non sequitur btw, Great Volt..."
2,2011-12-04,65,17,9,{Am reading a great biography of Ben Franklin ...
3,2011-12-21,1330,87,597,{Yum! Even better than deep fried butter: htt...
4,2011-12-22,1349,132,206,{Model S options are out! Performance in red a...


In [16]:
twitter_df_merged.count()

date             1767
like_count       1767
reply_count      1767
retweet_count    1767
text             1767
dtype: int64

In [17]:
twitter_df = twitter_df_merged.copy()

In [18]:
# Data Pre-processing and make the tweets all lowercase and remove stopwords.
# lower the tweets
twitter_df['preprocessed_text'] = twitter_df['text'].str.lower()

# remove apostrophe from words and url
twitter_df['preprocessed_text'] = [re.sub("('[a-z]+)\s", " ", row) for row in twitter_df['preprocessed_text']]
twitter_df['preprocessed_text'] = [re.sub("(')\s", " ", row) for row in twitter_df['preprocessed_text']]
twitter_df['preprocessed_text'] = [re.sub("(?:https:\/\/\S+)\s", "", row) for row in twitter_df['preprocessed_text']]

                                      
# filter out rest URLs
url_re = '(?:https?:\/\/)?(?:[^?\/\s]+[?\/])(.*)'
twitter_df['preprocessed_text'] = twitter_df['preprocessed_text'].apply(lambda row: ' '.join([word for word in row.split() if (not re.match(url_re, word))]))

# tokenize the tweets
tokenizer = RegexpTokenizer('[a-zA-Z]\w+\'?\w*')
twitter_df['tokenized_text'] = twitter_df['preprocessed_text'].apply(lambda row: tokenizer.tokenize(row))

#create an object of class PorterStemmer
porter = PorterStemmer()

# apply stemming
twitter_df['preprocessed_text'] = [porter.stem(row) for row in twitter_df['preprocessed_text']]   

# filter out stop words
en_stop_words = nltk.corpus.stopwords.words('english')
additional_stop_words =['amp', 'rt', 'th','co', 're', 've', 'kim', 'daca', 'us', 'it', 'th', 'you', 'haha', 'st', 'et', 'so', 'iii', 'also', 've', 'la', 're', 'the', 'https', 'wow', 'actually', 'due', 'ft', 'pcr', 'via', 'am', 'gt', 'com', 'since', 'in', 'me', 'and', 'btw', 'yesterday', 'ii', 'inu', 'on', 'http', 'to', 'vs', 'rd', 'ur', 'of', 'bs', 'km', 'est', 'em', 'lz', 'kms', 'aft', 'nd',  'here’s', 're', 'mqxfakpzf' 'mph', 'ht', 'etc', 'dm', 'doo']
en_stop_words.extend(additional_stop_words)

twitter_df['tokenized_text'] = twitter_df['tokenized_text'].apply(lambda row: [word for word in row if (not word in en_stop_words)])

df_tweets_clean = twitter_df.copy()
df_tweets_clean.head()

Unnamed: 0,date,like_count,reply_count,retweet_count,text,preprocessed_text,tokenized_text
0,2011-12-01,267,63,24,{I made the volume on the Model S http://t.co...,{i made the volume on the model s go to 11. no...,"[made, volume, model, go, need, work, miniatur..."
1,2011-12-03,82,38,31,"{That was a total non sequitur btw, Great Volt...","{that was a total non sequitur btw, great volt...","[total, non, sequitur, great, voltaire, quote,..."
2,2011-12-04,65,17,9,{Am reading a great biography of Ben Franklin ...,{am reading a great biography of ben franklin ...,"[reading, great, biography, ben, franklin, isa..."
3,2011-12-21,1330,87,597,{Yum! Even better than deep fried butter: htt...,{yum! even better than deep fried butter: yeah...,"[yum, even, better, deep, fried, butter, yeah,..."
4,2011-12-22,1349,132,206,{Model S options are out! Performance in red a...,{model s options are out! performance in red a...,"[model, options, performance, red, black, deli..."


In [19]:
df_tweets_clean = df_tweets_clean[['date', 'text', 'tokenized_text', 'like_count', 'reply_count', 'retweet_count']]
df_tweets_clean.head(10)

Unnamed: 0,date,text,tokenized_text,like_count,reply_count,retweet_count
0,2011-12-01,{I made the volume on the Model S http://t.co...,"[made, volume, model, go, need, work, miniatur...",267,63,24
1,2011-12-03,"{That was a total non sequitur btw, Great Volt...","[total, non, sequitur, great, voltaire, quote,...",82,38,31
2,2011-12-04,{Am reading a great biography of Ben Franklin ...,"[reading, great, biography, ben, franklin, isa...",65,17,9
3,2011-12-21,{Yum! Even better than deep fried butter: htt...,"[yum, even, better, deep, fried, butter, yeah,...",1330,87,597
4,2011-12-22,{Model S options are out! Performance in red a...,"[model, options, performance, red, black, deli...",1349,132,206
5,2011-12-24,{The Russians are having some challenges with ...,"[russians, challenges, rockets, many, engineer...",117113,1370,8434
6,2011-12-26,{Walked around a neighborhood recently rebuilt...,"[walked, around, neighborhood, recently, rebui...",558,102,171
7,2011-12-27,{If you ever wanted to know the *real* truth a...,"[ever, wanted, know, real, truth, moon, landin...",39,13,34
8,2011-12-28,{@TheOnion So true :)},"[theonion, true]",12,7,1
9,2011-12-29,{Am not saying that is *necessarily* good or b...,"[saying, necessarily, good, bad, reality, forc...",187,39,41


In [20]:
# count unique words
def get_most_freq_words(str, n=None):
    vect = CountVectorizer().fit(str)
    bag_of_words = vect.transform(str)
    sum_words = bag_of_words.sum(axis=0) 
    freq = [(word, sum_words[0, idx]) for word, idx in vect.vocabulary_.items()]
    freq =sorted(freq, key = lambda x: x[1], reverse=True)
    return freq[:n]
  
len(get_most_freq_words([ word for tweet in df_tweets_clean.tokenized_text for word in tweet]))

8253

In [21]:
df_tweets_clean.to_csv('data/tweets_data_2011_2021.csv', index=False)

## (1.5) Upload dataset to SQL Database

In [22]:
import sqlalchemy
from sqlalchemy import create_engine, inspect
import psycopg2
from config import db_password

In [23]:
# Create engine
# engine = create_engine(f'postgresql+psycopg2://{user}:{password}@{hostname}/twitter_vs_stocks')
engine = create_engine(f"postgresql://postgres:{db_password}@127.0.0.1:5432/twitter_vs_stocks")
# Use the Inspector to explore the database
inspector = inspect(engine)

In [24]:
df_tweets_clean.to_sql('tweets_text', engine, if_exists ='replace',method='multi', index=False)

# (2 ) Stock data

## (2.1) Getting the stock data

In [25]:
from yahoo_fin.stock_info import get_data

In [26]:
# historical daily data from Yahoo finance
tesla_df = get_data("tsla", start_date = '2011-01-01', end_date = None, index_as_date = False, interval="1d")
tesla_df

Unnamed: 0,date,open,high,low,close,adjclose,volume,ticker
0,2011-01-03,5.368000,5.400000,5.180000,5.324000,5.324000,6415000,TSLA
1,2011-01-04,5.332000,5.390000,5.204000,5.334000,5.334000,5937000,TSLA
2,2011-01-05,5.296000,5.380000,5.238000,5.366000,5.366000,7233500,TSLA
3,2011-01-06,5.366000,5.600000,5.362000,5.576000,5.576000,10306000,TSLA
4,2011-01-07,5.600000,5.716000,5.580000,5.648000,5.648000,11239500,TSLA
...,...,...,...,...,...,...,...,...
2691,2021-09-13,740.210022,744.780029,708.849976,743.000000,743.000000,22952500,TSLA
2692,2021-09-14,742.570007,754.469971,736.400024,744.489990,744.489990,18524900,TSLA
2693,2021-09-15,745.000000,756.859985,738.359985,755.830017,755.830017,15357700,TSLA
2694,2021-09-16,752.830017,758.909973,747.609985,756.989990,756.989990,13923400,TSLA


## (2.2) Clean the stock data

In [27]:
# Drop adjclose column
tesla_df = tesla_df.drop(columns=["adjclose", "ticker"])
tesla_df.head()

Unnamed: 0,date,open,high,low,close,volume
0,2011-01-03,5.368,5.4,5.18,5.324,6415000
1,2011-01-04,5.332,5.39,5.204,5.334,5937000
2,2011-01-05,5.296,5.38,5.238,5.366,7233500
3,2011-01-06,5.366,5.6,5.362,5.576,10306000
4,2011-01-07,5.6,5.716,5.58,5.648,11239500


In [28]:
# Determine data types for each column
tesla_df.dtypes

date      datetime64[ns]
open             float64
high             float64
low              float64
close            float64
volume             int64
dtype: object

## (2.3) Preprocessing the Stock Data

In [29]:
# Calculate change in stock price
tesla_df['change'] = tesla_df['close'].diff()
tesla_df.head(10)

Unnamed: 0,date,open,high,low,close,volume,change
0,2011-01-03,5.368,5.4,5.18,5.324,6415000,
1,2011-01-04,5.332,5.39,5.204,5.334,5937000,0.01
2,2011-01-05,5.296,5.38,5.238,5.366,7233500,0.032
3,2011-01-06,5.366,5.6,5.362,5.576,10306000,0.21
4,2011-01-07,5.6,5.716,5.58,5.648,11239500,0.072
5,2011-01-10,5.634,5.736,5.61,5.69,6713500,0.042
6,2011-01-11,5.718,5.742,5.384,5.392,8551000,-0.298
7,2011-01-12,5.402,5.48,5.304,5.392,4822000,0.0
8,2011-01-13,5.392,5.394,5.232,5.244,3618000,-0.148
9,2011-01-14,5.23,5.316,5.122,5.15,5960000,-0.094


In [30]:
tesla_df.to_csv('data/tesla_stocks.csv', index=False)

## (2.4) Upload dataset to SQL Database

In [31]:
tesla_df.to_sql('stock', engine, if_exists ='replace',method='multi', index=False)

# Machine Learning Model - Second Segment Project Deliverable

## Model Plan
- Prepare the dataframe with columns: tweet_text, price_previous day, price_next day, price_diff
- Preprocess the tweet text into features (countVectorier, tfidf)
- Classification: LogisticRegression 

### Query the dataframe with columns: tweet_text, price_previous day, price_next day, price_diff

In [32]:
# Setting up libraries:

import pandas as pd
from sqlalchemy import create_engine, inspect 
from config import db_password

In [33]:
# Create engine
engine = create_engine(f"postgresql://postgres:{db_password}@127.0.0.1:5432/twitter_vs_stocks")

In [34]:
#Creating new table for model, preprocessing the data

tweets_price = pd.read_sql_query(
    """
        SELECT 
            tweets.date AS tweet_date,
            tweets.text AS tweet_text,
            tweets.tokenized_text AS tweet_tokens,
            COALESCE(stock_prev.close, stock_prev_prev.close, stock_prev_prev_prev.close) AS prev_day_close,
            COALESCE(stock_next.close, stock_next_next.close, stock_next_next_next.close) AS next_day_close
        FROM tweets_text tweets
        LEFT JOIN stock stock_prev
            ON (tweets.date - INTERVAL '1 day') = stock_prev.date
        LEFT JOIN stock stock_prev_prev
            ON (tweets.date - INTERVAL '2 day') = stock_prev_prev.date
        LEFT JOIN stock stock_prev_prev_prev
            ON (tweets.date - INTERVAL '3 day') = stock_prev_prev_prev.date
        LEFT JOIN stock stock_next
            ON (tweets.date + INTERVAL '1 day') = stock_next.date
        LEFT JOIN stock stock_next_next
            ON (tweets.date + INTERVAL '2 day') = stock_next_next.date
        LEFT JOIN stock stock_next_next_next
            ON (tweets.date + INTERVAL '3 day') = stock_next_next_next.date
        WHERE tweets.date > '2011-01-01' AND tweets.tokenized_text != '{}'
        ORDER BY tweets.date
    """,
    con=engine
)

tweets_price.dropna(inplace=True)

#Computing difference between the stock price before the date of tweet and after the post. 
tweets_price['close_price_diff'] = tweets_price['next_day_close'] - tweets_price['prev_day_close']

In [35]:
tweets_price = tweets_price[tweets_price.tweet_tokens.str.count(',') > 1] # More than two words in tweet
tweets_price.head(5)

Unnamed: 0,tweet_date,tweet_text,tweet_tokens,prev_day_close,next_day_close,close_price_diff
0,2011-12-01,{I made the volume on the Model S http://t.co...,"{made,volume,model,go,need,work,miniature,ston...",6.548,6.66,0.112
1,2011-12-03,"{That was a total non sequitur btw, Great Volt...","{total,non,sequitur,great,voltaire,quote,argua...",6.66,6.884,0.224
2,2011-12-04,{Am reading a great biography of Ben Franklin ...,"{reading,great,biography,ben,franklin,isaacson...",6.66,6.884,0.224
3,2011-12-21,{Yum! Even better than deep fried butter: htt...,"{yum,even,better,deep,fried,butter,yeah,really...",5.58,5.554,-0.026
4,2011-12-22,{Model S options are out! Performance in red a...,"{model,options,performance,red,black,deliver,c...",5.514,5.58,0.066


## A - Classification: Which tweets increase stock price vs decrease


In [36]:
# Setting up libraries for model

#CountVectorizer = this takes in a list and counts how many times it appears
#TfidfTransformer = frequency of word in a tweet as compared to other tweets

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split

text_clf = Pipeline([
    ('vect', CountVectorizer(preprocessor=lambda x: x, tokenizer=lambda x: x)),
    ('tfidf', TfidfTransformer()),
    ('clf', LogisticRegression(C=0.01, random_state=1)),
])

In [37]:
train_df, test_df = train_test_split(tweets_price, random_state=1)

In [38]:
# Setting up variables
X_train = train_df.tweet_tokens.tolist()
y_train = (train_df['close_price_diff'] > 0).astype(int).values
X_test = test_df.tweet_tokens.tolist()
y_test = (test_df['close_price_diff'] > 0).astype(int).values

In [39]:
# Classify text data
text_clf.fit(X_train, y_train)

Pipeline(steps=[('vect',
                 CountVectorizer(preprocessor=<function <lambda> at 0x11bb950d0>,
                                 tokenizer=<function <lambda> at 0x11bc9aaf0>)),
                ('tfidf', TfidfTransformer()),
                ('clf', LogisticRegression(C=0.01, random_state=1))])

In [40]:
text_clf.score(X_test, y_test)

0.5454545454545454

In [41]:
# Testing predicted probability
predicted_proba_test = text_clf.predict_proba(X_test)[:, 1]

In [42]:
# Adding results into DataFrame
results_test = pd.DataFrame({
    'proba_positive_tweet': predicted_proba_test,
    'tweet_text': test_df['tweet_text'],
    'tweet_date': test_df['tweet_date'],
    'stock_price_change': test_df['close_price_diff'],
}).sort_values('proba_positive_tweet', ascending=False)
pd.set_option('display.max_colwidth', None)
results_test.head(10)

Unnamed: 0,proba_positive_tweet,tweet_text,tweet_date,stock_price_change
116,0.566446,{First test flight hop of our Grasshopper VTVL rocket! http://t.co/oomI5vSB},2012-09-22,0.128
1541,0.566264,"{Thank you, South Texas for your support! This is the gateway to Mars., Life, the Universe and Everything https://t.co/1ZCzInfc4u}",2020-12-10,5.51001
218,0.566229,"{Just want to say thanks to customers &amp; investors that took a chance on Tesla through the long, dark night. We wouldn't be here without you., @westcoastbill Thanks Bill!}",2013-05-08,2.778
1123,0.566174,"{Great meme review hosted by Will Smith, Highest reentry heating to date. Burning metal sparks from base heat shield visible in landing video. Fourth relight scheduled for April.}",2019-02-22,1.508003
1593,0.566141,"{From thence to Mars,\nAnd hence the Stars., Creating the city of Starbase, Texas, Horses are even self-driving! https://t.co/qPJrCFGs8J, Scammers &amp; crypto should get a room}",2021-03-02,-65.22998
1100,0.566067,"{Awesome moose sculpture! ♥️🇳🇴 https://t.co/CegEEHL4wz, Testing metallic heat shield at 1100C (2000F) @SpaceX https://t.co/frP5eZ5a0z, If test flight of 🐉 goes well next month, @NASA 👨‍🚀 👩‍🚀 will 🚀 to @Space_Station this summer!}",2019-01-25,0.974003
764,0.566025,{US govt testing by @NHTSAgov finds Model X to be the safest SUV in history by significant margin https://t.co/zAdb5FQPEI},2017-06-13,4.330002
1231,0.566024,"{Great progress by Starship Cape team. Started several months behind, but catching up fast. This will be a super fun race to orbit, moon &amp; Mars!}",2019-08-06,1.019997
434,0.565899,{We just got banned in West Virginia. Oh no. http://t.co/gNztPDNtVT},2015-04-04,2.419998
1012,0.56586,"{Tesla piece on the physics of car safety coming soon for those interested in technical details, .@NHTSAgov will post final safety probability stats soon. Model 3 has a shot at being safest car ever tested.}",2018-09-20,0.015999


In [43]:
results_test.to_csv('data/results_test.csv')