In [1]:
import pandas as pd
import numpy as np
import json

from pathlib import Path

from config import config
from src.preprocessing.merge_csv_in_df import merge_csv_by_date
from src.llm.loaders.gnews_loader import load_and_process_news_data
from src.llm.token_estimator import estimate_tokens_and_cost
from src.llm.langchain_news_analyzer import CryptoNewsSentimentAnalyzer
from src.llm.bart_news_sentiment import analyze_news_articles
from src.llm.bart_reddit_post_sentiment import analyze_reddit_posts
from src.utils.llm_utils import add_daily_aggregates, replace_nans_and_zeros_with_rolling_mean
from src.models.reddit_db_analyzer import RedditAnalyzer
from src.processing.reddit_data_aggregator import (
    get_daily_reddit_data,
    get_top_scored_posts,
)


df_btc = pd.read_csv(
    config.DATA_DIR / "processed" / "crypto_prices" / "btc_original.csv",
    parse_dates=["date"],
    index_col="date",
)
df_eth = pd.read_csv(
    config.DATA_DIR / "processed" / "crypto_prices" / "eth_original.csv",
    parse_dates=["date"],
    index_col="date",
)

# Google Trends


we have google trends searches for the words:

- bitcoin
- blockchain
- cryptocurrency
- ethereum
- investing


In [2]:
GOOGLE_TRENDS = config.DATA_DIR / "raw" / "google_trends"

df_btc = merge_csv_by_date(
    GOOGLE_TRENDS / "bitcoin.csv",
    df_btc,
    "gTrendsBitcoin",
    is_monthly=True,
    rows_to_skip=1,
)
df_btc = merge_csv_by_date(
    GOOGLE_TRENDS / "blockchain.csv",
    df_btc,
    "gTrendsBlockchain",
    is_monthly=True,
    rows_to_skip=1,
)
df_btc = merge_csv_by_date(
    GOOGLE_TRENDS / "cryptocurrency.csv",
    df_btc,
    "gTrendsCryptocurrency",
    is_monthly=True,
    rows_to_skip=1,
)
df_btc = merge_csv_by_date(
    GOOGLE_TRENDS / "investing.csv",
    df_btc,
    "gTrendsInvesting",
    is_monthly=True,
    rows_to_skip=1,
)

df_eth = merge_csv_by_date(
    GOOGLE_TRENDS / "ethereum.csv",
    df_eth,
    "gTrendsEthereum",
    is_monthly=True,
    rows_to_skip=1,
)
df_eth = merge_csv_by_date(
    GOOGLE_TRENDS / "blockchain.csv",
    df_eth,
    "gTrendsBlockchain",
    is_monthly=True,
    rows_to_skip=1,
)
df_eth = merge_csv_by_date(
    GOOGLE_TRENDS / "cryptocurrency.csv",
    df_eth,
    "gTrendsCryptocurrency",
    is_monthly=True,
    rows_to_skip=1,
)
df_eth = merge_csv_by_date(
    GOOGLE_TRENDS / "investing.csv",
    df_eth,
    "gTrendsInvesting",
    is_monthly=True,
    rows_to_skip=1,
)

# Gold


In [3]:
df_btc = merge_csv_by_date(config.DATA_DIR / "raw" / "gold.csv", df_btc, "goldValue")
df_eth = merge_csv_by_date(config.DATA_DIR / "raw" / "gold.csv", df_eth, "goldValue")

# SP 500


In [4]:
df_btc = merge_csv_by_date(config.DATA_DIR / "raw" / "sp500.csv", df_btc, "sp500Value")
df_eth = merge_csv_by_date(config.DATA_DIR / "raw" / "sp500.csv", df_eth, "sp500Value")

In [5]:
pd.set_option("display.max_columns", None)
df_btc.head()

Unnamed: 0_level_0,open,high,low,close,volume,marketCap,logPriceChange,priceMovement,localMin_7,localMax_7,localMin_14,localMax_14,localMin_21,localMax_21,dayOfWeek_Sin,dayOfWeek_Cos,EMA_12,EMA_26,RSI_14,BB_Middle,BB_Upper,BB_Lower,OBV,AO,KAMA,PPO,PPO_Signal,PPO_Histogram,PVO,PVO_Signal,PVO_Histogram,ROC,RSI,Stoch_RSI_K,Stoch_RSI_D,Stoch_K,Stoch_D,TSI,Ultimate_Oscillator,WilliamsR,ADI,CMF,EMV,FI,MFI,NVI,VPT,BBM,BBW,DCM,DCW,KCM,KCW,UI,Aroon_down,Aroon_up,CCI,DPO,Ichimoku_A,Ichimoku_B,Ichimoku_Base,Ichimoku_Conversion,KST,MACD,MACD_Signal,MI,TRIX,Vortex_down,Vortex_up,WMA,CR,PSAR_down,PSAR_up,gTrendsBitcoin,gTrendsBlockchain,gTrendsCryptocurrency,gTrendsInvesting,goldValue,sp500Value
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1
2017-01-01,963.658,1003.08,958.699,998.325,147775008.0,16050410000.0,0.035254,1,0,1,0,1,0,1,-0.781831,0.62349,927.913019,871.434686,83.084032,871.46615,1027.196258,715.736042,2922337000.0,139.503888,955.043802,9.281661,6.612502,2.669159,14.687295,17.065823,-2.378528,15.474703,83.084032,56.506121,62.292639,93.416834,94.168225,62.537845,82.395616,-2.234913,2957941000.0,0.562083,268.189478,2396025000.0,78.950576,1007.506291,147527500000.0,974.5448,0.0539,889.9595,226.241,891.830567,107.944512,82.395616,0.0,100.0,111.969673,55.459,745.59775,704.6545,881.415,932.752,21126.993631,56.478334,46.010969,27.806359,0.435426,0.523588,1.376167,958.571873,66.168065,0,1,6,16,1,37,1161.425195,2251.679688
2017-01-02,998.617,1031.39,996.702,1021.75,222184992.0,16429020000.0,0.023193,1,0,1,0,1,0,1,0.0,1.0,942.349478,882.569153,84.887313,883.52585,1046.419181,720.632519,3144522000.0,143.060506,965.552321,9.859751,7.261952,2.597799,16.843606,17.02138,-0.177774,10.820795,84.887313,62.800521,59.793982,95.188902,93.928779,63.913117,82.121062,-4.018458,3056633000.0,0.583475,304.980903,2797262000.0,80.534253,1007.506291,227017500000.0,983.7106,0.09394,904.1145,254.551,904.203846,110.878559,82.121062,0.0,100.0,131.923933,82.6259,746.1165,704.6545,898.401,946.907,22315.365979,59.780324,48.76484,27.991673,0.455892,0.493168,1.42638,972.880582,69.774625,0,1,6,16,1,37,1166.400024,2254.251953
2017-01-03,1021.6,1044.08,1021.6,1043.84,185168000.0,16786370000.0,0.021389,1,0,1,0,1,0,1,0.781831,0.62349,957.963404,894.515142,86.363588,896.6438,1066.653829,726.633771,3329690000.0,147.687241,989.282139,10.251627,7.859887,2.39174,16.391334,16.89537,-0.504037,16.134229,86.363588,68.222461,62.509701,97.882847,95.496194,65.565071,83.501748,-0.098089,3237848000.0,0.602965,318.138014,2981991000.0,81.475127,1009.668268,193285800000.0,997.7792,0.129162,910.941,266.278,917.502527,108.882506,83.501748,0.0,100.0,139.18114,108.60445,746.543,704.6545,904.746,970.489,23941.222052,63.448262,51.701525,28.176019,0.477714,0.49612,1.463899,989.391727,72.727175,0,1,6,16,1,37,1164.53125,2257.830078
2017-01-04,1044.4,1159.42,1044.4,1154.73,344945984.0,18571870000.0,0.10096,1,0,1,0,1,0,1,0.974928,-0.222521,988.235188,913.790317,91.076165,915.4759,1110.400038,720.551762,3674636000.0,167.045691,1049.890184,10.61374,8.410658,2.203083,22.662037,18.048704,4.613333,28.8498,91.076165,79.549401,70.190795,98.147193,97.07298,69.471661,85.562083,-1.441875,3554663000.0,0.673299,449.937853,8020429000.0,83.325859,1009.668268,398319500000.0,1036.4776,0.25011,969.1915,380.457,940.09562,142.543219,85.562083,0.0,100.0,214.597295,223.78505,752.1115,704.6545,962.416,1031.8375,26693.551368,74.444872,56.250194,28.82168,0.504156,0.422958,1.423311,1023.428,82.15233,0,1,6,16,1,37,1163.800049,2270.75
2017-01-05,1156.73,1191.1,910.417,1013.38,510199008.0,16300250000.0,-0.130575,0,0,0,0,0,0,0,0.433884,-0.900969,992.103621,921.16733,61.771598,926.89955,1116.585647,737.213453,3164437000.0,177.824844,1048.171858,10.54542,8.83761,1.70781,31.635758,20.766115,10.869643,11.653684,61.771598,56.209877,67.993913,81.46285,92.49763,57.008442,64.793651,-54.071487,3418777000.0,0.535435,192.256444,-3427722000.0,67.351462,1009.668268,517025500000.0,1046.405,0.214551,987.982,406.236,947.075085,235.894532,64.793651,0.0,100.0,91.704144,87.51825,754.89125,704.777,978.256,1050.7585,27561.826641,70.936291,59.187413,30.071714,0.528651,0.605755,1.089394,1027.063418,23.543196,1,0,6,16,1,37,1179.699951,2269.0


# Google News


In [6]:
df_news_btc = load_and_process_news_data(
    config.DATA_DIR / "raw" / "news_articles" / "bitcoin_articles.json"
)

df_news_eth = load_and_process_news_data(
    config.DATA_DIR / "raw" / "news_articles" / "ethereum_articles.json"
)

In [7]:
df_news_btc = df_news_btc.reset_index()
df_news_btc = df_news_btc.drop_duplicates(subset=["complete_text"], keep="first")

df_news_eth = df_news_eth.drop_duplicates(subset=["complete_text"], keep="first")
df_news_eth = df_news_eth.reset_index()

df_news_btc.head()

Unnamed: 0,date,title,subtitle,complete_text,word_count
0,2017-01-01,"A Look At Bitcoin Bubbles, When Will the Next ...",There have been four noteworthy bubbles in bit...,"A Look At Bitcoin Bubbles, When Will the Next ...",35
1,2017-01-01,"Bitcoin Price Tops $1,000 in First Day of 2017...",The price of bitcoin inched upward over the co...,"Bitcoin Price Tops $1,000 in First Day of 2017...",36
2,2017-01-01,What Will the Bitcoin Price Be in 2017?,CoinDesk's Charles Bovaird asks the experts fo...,What Will the Bitcoin Price Be in 2017? CoinDe...,25
3,2017-01-01,"Bitcoin Breaks $1,000 as Exchanges Break Volum...",Global bitcoin exchanges report record-setting...,"Bitcoin Breaks $1,000 as Exchanges Break Volum...",29
4,2017-01-01,"Bitcoin Price Tops $1,000 in First Day of 2017...",The price of bitcoin inched upward over the co...,"Bitcoin Price Tops $1,000 in First Day of 2017...",36


In [8]:
news_btc_cost = estimate_tokens_and_cost(
    df_news_btc["complete_text"].tolist(),
    cost_per_million_tokens=0.4,
    word_in_tokens=1.8,
)
news_eth_cost = estimate_tokens_and_cost(
    df_news_eth["complete_text"].tolist(),
    cost_per_million_tokens=0.4,
    word_in_tokens=1.8,
)

print("BTC News Cost")
print(f"Total Texts Processed: {news_btc_cost['total_texts']}")
print(f"Total Tokens Estimated: {news_btc_cost['total_tokens']}")
print(f"Estimated Cost: ${news_btc_cost['estimated_cost']:.2f} \n")

print("ETH News Cost")
print(f"Total Texts Processed: {news_eth_cost['total_texts']}")
print(f"Total Tokens Estimated: {news_eth_cost['total_tokens']}")
print(f"Estimated Cost: ${news_eth_cost['estimated_cost']:.2f}")

BTC News Cost
Total Texts Processed: 20741
Total Tokens Estimated: 1221579
Estimated Cost: $0.49 

ETH News Cost
Total Texts Processed: 16937
Total Tokens Estimated: 984198
Estimated Cost: $0.39


sentiment analysis with saving into csv

In [9]:
# analyzer_btc = CryptoNewsSentimentAnalyzer(verbose=True, output_file_path=config.DATA_DIR / "temp" / "news_sentiment_btc.csv")
# result_df_btc = analyzer_btc.analyze_articles_in_range(
#     df_news_btc, start_date="2022-04-28", end_date="2022-12-31"
# )

In [10]:
results_df_btc = pd.read_csv(
    config.DATA_DIR / "processed" / "news_articles" / "news_sentiment_btc.csv",
    parse_dates=["date"],
)
results_df_btc = add_daily_aggregates(results_df_btc).set_index("date")
results_df_btc = replace_nans_and_zeros_with_rolling_mean(results_df_btc)

df_btc = df_btc.merge(results_df_btc, left_index=True, right_index=True, how="inner")
df_btc = df_btc.rename(columns={"average_score": "gnewsAvgSentiment"})

In [11]:
df_btc.to_csv(config.DATA_DIR / "processed" / "crypto_prices" / "btc.csv")

sentiment analysis with saving into csv

In [12]:
# result_df_eth = analyze_news_articles(df_news_eth, "eth")

In [13]:
results_df_eth = pd.read_csv(
    config.DATA_DIR / "processed" / "news_articles" / "news_sentiment_eth.csv",
    parse_dates=["date"],
    usecols=["date", "average_score"],
    index_col="date",
)
results_df_eth = replace_nans_and_zeros_with_rolling_mean(results_df_eth)

df_eth = df_eth.merge(results_df_eth, left_index=True, right_index=True, how="inner")
df_eth = df_eth.rename(columns={"average_score": "gnewsAvgSentiment"})

In [14]:
df_eth.to_csv(config.DATA_DIR / "processed" / "crypto_prices" / "eth.csv")

# Reddit


In [15]:
with RedditAnalyzer(config.DATABASE_URL) as analyzer:
    print(f"Total posts: {analyzer.get_total_posts()}")
    print(f"Total comments: {analyzer.get_total_comments()}")

Total posts: 942980
Total comments: 11161264


In [16]:
df_reddit_btc = get_daily_reddit_data(
    start_date="2017-01-01", end_date="2022-12-31", subreddit="bitcoin"
)
df_reddit_eth = get_daily_reddit_data(
    start_date="2017-01-01", end_date="2022-12-31", subreddit="ethereum"
)

Days with no posts: 68
Days with no comments: 0
Days with no unique authors: 68
Days with no average score: 68
Days with no posts: 163
Days with no comments: 14
Days with no unique authors: 163
Days with no average score: 163


In [17]:
df_reddit_btc = replace_nans_and_zeros_with_rolling_mean(df_reddit_btc)
df_btc = pd.merge(
    df_btc,
    df_reddit_btc,
    on="date",
    how="inner",
)

df_reddit_eth = replace_nans_and_zeros_with_rolling_mean(df_reddit_eth)
df_eth = pd.merge(
    df_eth,
    df_reddit_eth,
    on="date",
    how="inner",
)

In [18]:
top_posts_btc = get_top_scored_posts(
    start_date="2017-01-01", end_date="2022-12-31", subreddit="bitcoin", limit=5
)
top_posts_eth = get_top_scored_posts(
    start_date="2017-01-01", end_date="2022-12-31", subreddit="ethereum", limit=5
)

top_posts_btc.head()

Unnamed: 0,date,title,selftext,score
72359,2016-12-31,An Idea I had about the deflationary nature of...,Had an interesting thought today about bitcoin...,5
330310,2016-12-31,Double your BTC !,[removed],0
4030,2017-01-01,Congratulations Bitcoin for reaching $1000. Yo...,I seriously didn't expect it to happen. And I ...,257
7455,2017-01-01,OK $1000 was never the moon. We clear?,Between 50k-100k is the moon.,132
8859,2017-01-01,Say HI to the new record market cap 16b$,"atm = $15,926,615,035",108


In [19]:
top_posts_btc.to_csv(
    config.DATA_DIR / "processed" / "reddit" / "top_posts_btc.csv", index=False
)
top_posts_eth.to_csv(
    config.DATA_DIR / "processed" / "reddit" / "top_posts_eth.csv", index=False
)

sentiment analysis with saving into csv

In [20]:
# reddit_sentiment_btc = analyze_reddit_posts(
#     top_posts_btc,
#     "bitcoin",
#     output_path=config.DATA_DIR / "processed" / "reddit" / "reddit_sentiment_btc.csv",
# )

# reddit_sentiment_eth = analyze_reddit_posts(
#     top_posts_eth,
#     "ethereum",
#     output_path=config.DATA_DIR / "processed" / "reddit" / "reddit_sentiment_eth.csv",
# )

In [21]:
reddit_sentiment_btc = pd.read_csv(
    config.DATA_DIR / "processed" / "reddit" / "reddit_sentiment_btc.csv",
    parse_dates=["date"],
)
reddit_sentiment_eth = pd.read_csv(
    config.DATA_DIR / "processed" / "reddit" / "reddit_sentiment_eth.csv",
    parse_dates=["date"],
)

In [22]:
reddit_avg_btc = add_daily_aggregates(reddit_sentiment_btc)
reddit_avg_btc.rename(columns={"average_score": "postsAvgSentiment"}, inplace=True)
reddit_avg_eth = add_daily_aggregates(reddit_sentiment_eth)
reddit_avg_eth.rename(columns={"average_score": "postsAvgSentiment"}, inplace=True)

In [23]:
reddit_avg_btc = replace_nans_and_zeros_with_rolling_mean(reddit_avg_btc)
reddit_avg_eth = replace_nans_and_zeros_with_rolling_mean(reddit_avg_eth)
df_btc = pd.merge(df_btc, reddit_avg_btc, on='date', how='inner')
df_eth = pd.merge(df_eth, reddit_avg_eth, on='date', how='inner')


In [24]:
df_btc.head()

Unnamed: 0,date,open,high,low,close,volume,marketCap,logPriceChange,priceMovement,localMin_7,localMax_7,localMin_14,localMax_14,localMin_21,localMax_21,dayOfWeek_Sin,dayOfWeek_Cos,EMA_12,EMA_26,RSI_14,BB_Middle,BB_Upper,BB_Lower,OBV,AO,KAMA,PPO,PPO_Signal,PPO_Histogram,PVO,PVO_Signal,PVO_Histogram,ROC,RSI,Stoch_RSI_K,Stoch_RSI_D,Stoch_K,Stoch_D,TSI,Ultimate_Oscillator,WilliamsR,ADI,CMF,EMV,FI,MFI,NVI,VPT,BBM,BBW,DCM,DCW,KCM,KCW,UI,Aroon_down,Aroon_up,CCI,DPO,Ichimoku_A,Ichimoku_B,Ichimoku_Base,Ichimoku_Conversion,KST,MACD,MACD_Signal,MI,TRIX,Vortex_down,Vortex_up,WMA,CR,PSAR_down,PSAR_up,gTrendsBitcoin,gTrendsBlockchain,gTrendsCryptocurrency,gTrendsInvesting,goldValue,sp500Value,gnewsAvgSentiment,postNumber,commentNumber,uniqueAuthors,averagePostScore,postsAvgSentiment
0,2017-01-01,963.658,1003.08,958.699,998.325,147775008.0,16050410000.0,0.035254,1,0,1,0,1,0,1,-0.781831,0.62349,927.913019,871.434686,83.084032,871.46615,1027.196258,715.736042,2922337000.0,139.503888,955.043802,9.281661,6.612502,2.669159,14.687295,17.065823,-2.378528,15.474703,83.084032,56.506121,62.292639,93.416834,94.168225,62.537845,82.395616,-2.234913,2957941000.0,0.562083,268.189478,2396025000.0,78.950576,1007.506291,147527500000.0,974.5448,0.0539,889.9595,226.241,891.830567,107.944512,82.395616,0.0,100.0,111.969673,55.459,745.59775,704.6545,881.415,932.752,21126.993631,56.478334,46.010969,27.806359,0.435426,0.523588,1.376167,958.571873,66.168065,0,1,6,16,1,37,1161.425195,2251.679688,8.75,323.0,3024,229.0,32.758514,9.1
1,2017-01-02,998.617,1031.39,996.702,1021.75,222184992.0,16429020000.0,0.023193,1,0,1,0,1,0,1,0.0,1.0,942.349478,882.569153,84.887313,883.52585,1046.419181,720.632519,3144522000.0,143.060506,965.552321,9.859751,7.261952,2.597799,16.843606,17.02138,-0.177774,10.820795,84.887313,62.800521,59.793982,95.188902,93.928779,63.913117,82.121062,-4.018458,3056633000.0,0.583475,304.980903,2797262000.0,80.534253,1007.506291,227017500000.0,983.7106,0.09394,904.1145,254.551,904.203846,110.878559,82.121062,0.0,100.0,131.923933,82.6259,746.1165,704.6545,898.401,946.907,22315.365979,59.780324,48.76484,27.991673,0.455892,0.493168,1.42638,972.880582,69.774625,0,1,6,16,1,37,1166.400024,2254.251953,9.0,354.0,3370,263.0,20.536723,8.5
2,2017-01-03,1021.6,1044.08,1021.6,1043.84,185168000.0,16786370000.0,0.021389,1,0,1,0,1,0,1,0.781831,0.62349,957.963404,894.515142,86.363588,896.6438,1066.653829,726.633771,3329690000.0,147.687241,989.282139,10.251627,7.859887,2.39174,16.391334,16.89537,-0.504037,16.134229,86.363588,68.222461,62.509701,97.882847,95.496194,65.565071,83.501748,-0.098089,3237848000.0,0.602965,318.138014,2981991000.0,81.475127,1009.668268,193285800000.0,997.7792,0.129162,910.941,266.278,917.502527,108.882506,83.501748,0.0,100.0,139.18114,108.60445,746.543,704.6545,904.746,970.489,23941.222052,63.448262,51.701525,28.176019,0.477714,0.49612,1.463899,989.391727,72.727175,0,1,6,16,1,37,1164.53125,2257.830078,8.25,307.0,3206,229.0,21.687296,6.38
3,2017-01-04,1044.4,1159.42,1044.4,1154.73,344945984.0,18571870000.0,0.10096,1,0,1,0,1,0,1,0.974928,-0.222521,988.235188,913.790317,91.076165,915.4759,1110.400038,720.551762,3674636000.0,167.045691,1049.890184,10.61374,8.410658,2.203083,22.662037,18.048704,4.613333,28.8498,91.076165,79.549401,70.190795,98.147193,97.07298,69.471661,85.562083,-1.441875,3554663000.0,0.673299,449.937853,8020429000.0,83.325859,1009.668268,398319500000.0,1036.4776,0.25011,969.1915,380.457,940.09562,142.543219,85.562083,0.0,100.0,214.597295,223.78505,752.1115,704.6545,962.416,1031.8375,26693.551368,74.444872,56.250194,28.82168,0.504156,0.422958,1.423311,1023.428,82.15233,0,1,6,16,1,37,1163.800049,2270.75,5.0,587.0,4998,391.0,19.373083,4.9
4,2017-01-05,1156.73,1191.1,910.417,1013.38,510199008.0,16300250000.0,-0.130575,0,0,0,0,0,0,0,0.433884,-0.900969,992.103621,921.16733,61.771598,926.89955,1116.585647,737.213453,3164437000.0,177.824844,1048.171858,10.54542,8.83761,1.70781,31.635758,20.766115,10.869643,11.653684,61.771598,56.209877,67.993913,81.46285,92.49763,57.008442,64.793651,-54.071487,3418777000.0,0.535435,192.256444,-3427722000.0,67.351462,1009.668268,517025500000.0,1046.405,0.214551,987.982,406.236,947.075085,235.894532,64.793651,0.0,100.0,91.704144,87.51825,754.89125,704.777,978.256,1050.7585,27561.826641,70.936291,59.187413,30.071714,0.528651,0.605755,1.089394,1027.063418,23.543196,1,0,6,16,1,37,1179.699951,2269.0,7.75,586.0,4836,399.0,20.919795,5.42


In [25]:
df_eth.head()

Unnamed: 0,date,open,high,low,close,volume,marketCap,logPriceChange,priceMovement,localMin_7,localMax_7,localMin_14,localMax_14,localMin_21,localMax_21,dayOfWeek_Sin,dayOfWeek_Cos,EMA_12,EMA_26,RSI_14,BB_Middle,BB_Upper,BB_Lower,OBV,AO,KAMA,PPO,PPO_Signal,PPO_Histogram,PVO,PVO_Signal,PVO_Histogram,ROC,RSI,Stoch_RSI_K,Stoch_RSI_D,Stoch_K,Stoch_D,TSI,Ultimate_Oscillator,WilliamsR,ADI,CMF,EMV,FI,MFI,NVI,VPT,BBM,BBW,DCM,DCW,KCM,KCW,UI,Aroon_down,Aroon_up,CCI,DPO,Ichimoku_A,Ichimoku_B,Ichimoku_Base,Ichimoku_Conversion,KST,MACD,MACD_Signal,MI,TRIX,Vortex_down,Vortex_up,WMA,CR,PSAR_down,PSAR_up,gTrendsEthereum,gTrendsBlockchain,gTrendsCryptocurrency,gTrendsInvesting,goldValue,sp500Value,gnewsAvgSentiment,postNumber,commentNumber,uniqueAuthors,averagePostScore,postsAvgSentiment
0,2017-01-02,8.171,8.436,8.054,8.379,14579600.0,733331700.0,0.024893,1,0,1,0,1,0,0,0.0,1.0,7.902078,7.9909,55.894527,7.75165,8.523941,6.979359,-108713650.0,0.280403,7.763207,-2.622934,-3.053815,0.430881,8.136156,4.696596,3.43956,16.927156,55.894527,89.205618,88.141672,76.109984,73.653369,-5.514304,56.68876,-10.006761,-58926220.0,-0.07641,0.145775,2145547.0,56.046976,1011.342711,122162468.4,8.193,0.067134,7.7875,1.479,7.908923,1.692791,56.68876,50.0,71.428571,109.611876,-0.7196,7.7805,9.4535,7.9545,7.7875,-7621.086936,-0.088821,-0.245341,22.582578,-0.616175,0.88832,1.070371,7.957182,11.789054,0,1,1,16,1,37,1166.400024,2254.251953,10.0,32.0,473.0,26.0,19.8125,8.78
1,2017-01-03,8.375,9.997,8.317,9.725,33625200.0,851512100.0,0.148971,1,0,1,0,1,0,1,0.781831,0.62349,8.182528,8.119352,69.122964,7.82465,8.965367,6.683933,-75088450.0,0.484497,8.234942,-1.081846,-2.659422,1.577575,18.330617,7.4234,10.907216,33.842554,69.122964,96.992635,90.76809,85.611561,77.631921,2.986338,64.179035,-9.223466,-36189180.0,0.047369,0.488568,8304686.0,65.985947,1011.342711,327005070.0,8.4812,0.299624,8.5225,2.949,8.081883,2.171573,64.179035,42.857143,100.0,213.117789,0.5028,7.808,9.4535,8.5225,8.5225,-4361.907257,0.063176,-0.183638,22.923704,-0.600995,0.671062,1.100473,8.318618,38.245928,0,1,1,16,1,37,1164.53125,2257.830078,10.0,45.0,499.0,40.0,30.688889,8.3
2,2017-01-04,9.709,11.276,9.56,11.252,41051200.0,985515900.0,0.145846,1,0,1,0,1,0,1,0.974928,-0.222521,8.654754,8.3514,77.403185,7.99585,9.875568,6.116132,-34037250.0,0.859659,9.195351,1.822173,-1.763103,3.585275,27.163883,11.371497,15.792387,56.8223,77.403185,100.0,95.399418,93.40071,85.040751,14.701268,70.838955,-0.567644,3713739.0,0.212771,0.834204,16073330.0,72.115306,1011.342711,461908102.4,9.0996,0.544797,9.162,4.228,8.383799,2.618471,70.838955,35.714286,100.0,261.053695,1.8983,7.7865,9.299,9.162,9.179,884.295568,0.303355,-0.086239,23.516096,-0.576634,0.598882,1.220716,8.912982,54.80637,0,1,1,16,1,37,1163.800049,2270.75,10.0,69.0,540.0,52.0,17.15942,9.225
3,2017-01-05,11.287,11.891,9.403,10.254,41557400.0,898497900.0,-0.092878,0,0,0,0,0,0,0,0.433884,-0.900969,8.900792,8.492333,65.113195,8.11565,10.235079,5.996221,-75594650.0,1.302553,9.342844,3.859138,-0.638655,4.497793,32.020745,15.501346,16.519398,40.967831,65.113195,90.529964,95.840867,85.469176,88.160482,18.362934,60.93812,-33.801363,-9414926.0,0.14969,0.949666,7852241.0,62.512983,1011.342711,426129579.6,9.5566,0.484162,9.4695,4.843,8.561913,3.316902,60.93812,28.571429,100.0,179.259908,0.777,7.7865,9.134,9.4695,9.4865,4607.147288,0.408459,0.0127,24.405413,-0.547544,0.616786,1.178159,9.251764,30.22639,0,1,1,16,1,37,1179.699951,2269.0,2.25,72.0,488.0,54.0,18.277778,7.2
4,2017-01-06,10.286,10.629,9.627,10.255,29471800.0,898845900.0,9.8e-05,1,0,0,0,0,0,0,-0.433884,-0.900969,9.109132,8.622901,65.119171,8.23455,10.545265,5.923835,-46122850.0,1.617603,9.481398,5.982586,0.685593,5.296992,30.822969,18.565671,12.257298,43.006554,65.119171,80.856842,90.462269,77.283426,85.384437,20.872645,62.937335,-33.780714,-1944030.0,0.140262,0.877292,6734702.0,58.610099,1011.352463,302233309.0,9.973,0.376107,9.4695,4.843,8.72316,3.382721,62.937335,21.428571,92.857143,115.882653,0.6389,7.7865,9.092,9.4695,9.715,8707.79682,0.486231,0.107406,25.047233,-0.514662,0.728916,1.22207,9.536545,30.238343,0,1,1,16,1,37,1171.900024,2276.97998,6.125,48.0,466.0,44.0,22.229167,8.9


In [26]:
df_btc.to_csv(config.DATA_DIR / "processed" / "crypto_prices" / "btc_reddit.csv", index=False)
df_eth.to_csv(config.DATA_DIR / "processed" / "crypto_prices" / "eth_reddit.csv", index=False)