In [33]:
import polars as pl
import pandas as pd
from polars import col
import yfinance as yf
from ta import add_all_ta_features

In [2]:
def get_moving_stats(df, cols):
  # Define a list of periods to calculate
  periods = [3, 5, 10, 15, 30]
  
  # Loop through each column in the list
  for col in cols:
    # Loop through each period in the list
    for p in periods:
      # Calculate moving average using rolling() and mean()
      df[f"{col}_MA_{p}"] = df[col].rolling(p).mean()
      # Calculate moving max using rolling() and max()
      df[f"{col}_MAX_{p}"] = df[col].rolling(p).max()
      # Calculate moving min using rolling() and min()
      df[f"{col}_MIN_{p}"] = df[col].rolling(p).min()

  return df # Return the modified dataframe

In [3]:
def get_data(ticker, start_date, end_date):
  # create a yfinance object with the given ticker
  stock = yf.Ticker(ticker)
  # get historical market data for the given date range
  data = stock.history(start=start_date, end=end_date)
  # return the data as a pandas dataframe
  return data

In [4]:
# Define a function that slices and casts a column
def slice_and_cast(col):
    return pl.col(col).str.slice(0, 10).str.strptime(pl.Datetime, "%Y-%m-%d").cast(pl.Date)

In [5]:
def split_column(df):
    df = df.with_columns(pl.col("File").str.slice(22, 30))
    df = df.with_columns([
                           col('File'),
                           *[col('File').apply(lambda s, i=i: s.split('/')[i]).alias(col_name)
                            for i, col_name in enumerate(['Query_Type', 'Query', 'FileName'])]
    ])
    return df

### Tweet Features

In [6]:
tickers_df=pl.read_csv('../Data/ScoredDf/Tickers.csv', sep='~', encoding='utf-8')

In [7]:
ceos_df=pl.read_csv('../Data/ScoredDf/Ceos.csv', sep='~', encoding='utf-8')

In [8]:
tickers_df.columns = list(map(lambda x: x.replace(" ", "_"), tickers_df.columns))
ceos_df.columns = list(map(lambda x: x.replace(" ", "_"), ceos_df.columns))

In [9]:
# Apply the slice function to the date column to create the dates
tickers_df = tickers_df.with_columns(slice_and_cast("Created_at"))
#ceos_df = ceos_df.with_columns(slice_and_cast("Created_at"))

In [10]:
# Apply the slice function to the date column to create the dates
tickers_df = split_column(tickers_df)
#ceos_df = split_column(ceos_df)

In [11]:
# Aggregating for each date group by date and categorical column
tickers_features_querry = (
    tickers_df.lazy()
    .groupby(["Created_at", "Query", "Query_Type"])
    .agg(
        [
            (pl.col('Sentiment_Label')=="Positive").sum().alias('Tickers Sentiment Positive'),
            (pl.col('Sentiment_Label')=="Negative").sum().alias('Tickers Sentiment Negative'),
            (pl.col('Signal_Label')=="Bullish").sum().alias('Tickers Signal Bullish'),
            (pl.col('Signal_Label')=="Bearish").sum().alias('Tickers Signal Bearish'),
            (pl.col('Created_at')).count().alias('Tickers Total Daily Tweets'),
        ]
    )
    .sort("Created_at", descending=False)
)

tickers_sentiment_features = tickers_features_querry.collect()

In [12]:
# Create a Compnay Column for merging with Ceos Dataframe
tickers_sentiment_features = tickers_sentiment_features.with_column(
    pl.when(pl.col("Query") == "AMZN")
    .then(pl.lit("AMAZON"))
    .when(pl.col("Query") == "AAPL")
    .then(pl.lit("APPLE"))
    .when(pl.col("Query") == "TSLA")
    .then(pl.lit("TESLA"))
    .otherwise(pl.lit("ERROR"))
    .alias("Company")
)

In [13]:
cols=[
 'Tickers Sentiment Positive',
 'Tickers Sentiment Negative',
 'Tickers Signal Bullish',
 'Tickers Signal Bearish',
 'Tickers Total Daily Tweets',
]

In [14]:
tickers_sentiment_features = get_moving_stats(tickers_sentiment_features.to_pandas(),cols)

In [None]:
# Aggregating for each date group by date and categorical column
ceos_features_querry = (
    ceos_df.lazy()
    .groupby(["Created_at", "Query", "Query_Type"])
    .agg(
        [
            (pl.col('Sentiment_Label')=="Positive").sum().alias('Ceos Sentiment Positive'),
            (pl.col('Sentiment_Label')=="Negative").sum().alias('Ceos Sentiment Negative'),
            (pl.col('Signal_Label')=="Bullish").sum().alias('Ceos Signal Bullish'),
            (pl.col('Signal_Label')=="Bearish").sum().alias('Ceos Signal Bearish'),
            (pl.col('Created_at')).count().alias('Ceos Total Daily Tweets'),
        ]
    )
    .sort("Created_at", descending=False)
)

ceos_sentiment_features = ceos_features_querry.collect()

In [None]:
# Create a Compnay Column for merging with Ticker Dataframe
ceos_sentiment_features = ceos_sentiment_features.with_column(
    pl.when(pl.col("Query") == "Jeff Bezos")
    .then(pl.lit("AMAZON"))
    .when(pl.col("Query") == "Tim Cook")
    .then(pl.lit("APPLE"))
    .when(pl.col("Query") == "Elon Musk")
    .then(pl.lit("TESLA"))
    .otherwise(pl.lit("ERROR"))
    .alias("Company")
)

In [None]:
cols=[
 'Ceos Sentiment Positive',
 'Ceos Sentiment Negative',
 'Ceos Signal Bullish',
 'Ceos Signal Bearish',
 'Ceos Total Daily Tweets',
]

In [None]:
ceos_sentiment_features = get_moving_stats(ceos_sentiment_features.to_pandas(),cols)

In [None]:
#Merge the two datasets
twitter_features=tickers_sentiment_features.join(ceos_sentiment_features, on=["Created_at", "Company"], how="inner")

#Check if there are left out records on either side
twitter_features.shape[0] == tickers_sentiment_features.shape[0] == ceos_sentiment_features.shape[0]


### Techincal Analysis Features

In [15]:
#Download Apple Stock Data
aapl_df = yf.download("AAPL",
                 start="2019-01-01",
                 end="2021-12-31")
#Add Company Column
aapl_df["Company"]="APPLE"
# Subtract previous row value from current row value
aapl_df["Price Change"] = aapl_df["Adj Close"] - aapl_df["Adj Close"].shift(1)
# Compute percentage change from previous row value
aapl_df["% Price Change"] = aapl_df["Adj Close"].pct_change() 
# Apply a function to assign Up or Down based on Price Change
aapl_df["Movement"] = aapl_df["Price Change"].apply(lambda x: "Up" if x > 0 else "Down") 
# Add all technical analysis features
aapl_df = add_all_ta_features(
    aapl_df, open="Open", high="High", low="Low", close="Close", volume="Volume")

[*********************100%***********************]  1 of 1 completed


In [16]:
#Download Tesla Stock Data
tsla_df = yf.download("TSLA",
                 start="2019-01-01",
                 end="2021-12-31")
#Add Company Column
tsla_df["Company"]="TESLA"
# Subtract previous row value from current row value
tsla_df["Price Change"] = tsla_df["Adj Close"] - tsla_df["Adj Close"].shift(1)
# Compute percentage change from previous row value
tsla_df["% Price Change"] = tsla_df["Adj Close"].pct_change() 
# Apply a function to assign Up or Down based on Price Change
tsla_df["Movement"] = tsla_df["Price Change"].apply(lambda x: "Up" if x > 0 else "Down") 
# Add all technical analysis features
tsla_df = add_all_ta_features(
    tsla_df, open="Open", high="High", low="Low", close="Close", volume="Volume")

[*********************100%***********************]  1 of 1 completed


In [17]:
#Download Amazon Stock Data
amzn_df = yf.download("AMZN",
                 start="2019-01-01",
                 end="2021-12-31")
#Add Company Column
amzn_df["Company"]="AMAZON"
# Subtract previous row value from current row value
amzn_df["Price Change"] = amzn_df["Adj Close"] - amzn_df["Adj Close"].shift(1)
# Compute percentage change from previous row value
amzn_df["% Price Change"] = amzn_df["Adj Close"].pct_change() 
# Apply a function to assign Up or Down based on Price Change
amzn_df["Movement"] = amzn_df["Price Change"].apply(lambda x: "Up" if x > 0 else "Down") 
# Add all technical analysis features
amzn_df = add_all_ta_features(
    amzn_df, open="Open", high="High", low="Low", close="Close", volume="Volume")

[*********************100%***********************]  1 of 1 completed


In [18]:
# Combine the dataframes
technical_df = pd.concat([amzn_df, tsla_df, aapl_df])

### Final DataFrame Creation