In [1]:
from typing import List
from matplotlib.pyplot import get
from pandas.core.frame import DataFrame
from textblob import TextBlob
import pandas as pd

In [2]:
# CSV Files & drop the extra columns that go beyond the dataset needed
elon_df = pd.read_csv('elon.csv')
elon_df.drop(elon_df.tail(1689).index,inplace=True)

In [3]:
# A textblod function that will read tweets, and create a column of noun phrases, polarity, and subjectivity
def add_tb_data(df: DataFrame, attrs: List[str]) -> DataFrame:
    def get_tb_attr(tweet, _attr: str):
        tb = TextBlob(tweet)
        return tb.__getattribute__(_attr)
    
    for attr in attrs:
        df[attr] = df['tweet'].apply(get_tb_attr, _attr=attr)

    return df

In [4]:
# Function for removing the time zone from the created_at column
def fix_time(t: str):
    suffix_pst = ' Pacific Standard Time'
    suffix_pdt = ' Pacific Daylight Time'
    return t.replace(suffix_pdt, '').replace(suffix_pst, '')

In [5]:
# Dataframe for the columns we need
elon_df = elon_df[['id', 'tweet', 'created_at']]
# Clean the date & time by removing the time zones
elon_df['created_at'] = elon_df['created_at'].apply(lambda x: fix_time(x))
# These will be used for the textblob function
add_attrs = ['noun_phrases', 'polarity', 'subjectivity']
# Run the new function on our database, create attribute columns, then populate them with phrases and values
elon_df = add_tb_data(elon_df, add_attrs)
# Saving dataframe for later use
elon_df.to_csv('elon_sentsubj.csv', index=False)
