In [None]:
%pwd
%cd C:\Users\yashs\source\repos\Ripa-Shah\Sentiment-Analysis\Analyzing-Social-Sentiment-Data-to-determine-correlation-with-Cryptocurrency-Price

C:\Users\yashs\source\repos\Ripa-Shah\Sentiment-Analysis\Analyzing-Social-Sentiment-Data-to-determine-correlation-with-Cryptocurrency-Price


In [12]:
import praw
import pandas as pd
import time
from datetime import datetime, timedelta
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

# --- CONFIGURATION ---

# List of time filters to check, ordered from newest to oldest
# Note: These are only used for the initial large historical fetch.
TIME_FILTERS = ["day", "week", "month", "year"]

# Define the start date for historical fetch (as before)
start_date_str = "2025-06-15 00:00:00"
start_timestamp = datetime.strptime(start_date_str, "%Y-%m-%d %H:%M:%S").timestamp()

# Polling intervals in seconds
LIVE_STREAM_INTERVAL = 3    # Stream new posts/comments every 3 seconds
HISTORICAL_FETCH_INTERVAL = 300 # Fetch historical data every 5 minutes (300 seconds)

# CSV file path
CSV_FILE = "reddit_crypto_sentiment_analysis.csv"

# --- INITIALIZATION ---

# Initialize Reddit API
reddit = praw.Reddit(
    client_id="M9iSExe3kRkwrILVO75-CA",
    client_secret="rnuYodBu7llgTAmssHujZVUt37mrAA",
    user_agent="crypto-sentiment-app by u/MindlessEssay2919"
)

# Define cryptocurrency keywords to track
crypto_keywords = {
    "bitcoin": "BTC", "btc": "BTC",
    "ethereum": "ETH", "eth": "ETH",
    "cardano": "ADA", "ada": "ADA",
    "dogecoin": "DOGE", "doge": "DOGE",
    "solana": "SOL", "sol": "SOL"
}

# Initialize sentiment analyzer
analyzer = SentimentIntensityAnalyzer()

# Initialize a set to prevent processing the same post/comment multiple times
processed_ids = set()

# Initialize timers
last_historical_fetch_time = time.time() - HISTORICAL_FETCH_INTERVAL - 1 # Ensures it runs immediately
last_live_stream_time = time.time() - LIVE_STREAM_INTERVAL - 1 # Ensures it runs immediately

# Initialize CSV file (only once)
df_init = pd.DataFrame(columns=[
    "date", "user_id", "type", "title", "cryptocurrency", "review", "sentiment_score"
])
df_init.to_csv(CSV_FILE, index=False)

print("üîÑ Starting Reddit post & comment sentiment streaming...")

# ----------------------------------------------------------------------
# HELPER FUNCTION FOR DATA PROCESSING
# ----------------------------------------------------------------------

def process_submission(post, post_type):
    """Processes a single PRAW submission (post) and its comments, extracts sentiment, and adds to records."""
    
    # Check 1: Has this post been processed already? (Prevents duplicates)
    if post.id in processed_ids:
        return [] # Return empty if already processed

    post_timestamp = post.created_utc
    
    # Check 2: Ignore posts/comments older than the global start date for the main loop
    # This check is primarily for the historical fetch, but good for consistency.
    if post_timestamp < start_timestamp:
        return []

    records = []
    
    try:
        # --- Post Processing ---
        title = getattr(post, 'title', '')
        review = getattr(post, 'selftext', '') if hasattr(post, 'selftext') else ''
        user_id = str(post.author) if post.author else "Anonymous"
        date = datetime.utcfromtimestamp(post_timestamp).strftime("%Y-%m-%d %H:%M:%S")

        # Detect which crypto is mentioned
        text_lower = (title + " " + review).lower()
        found_coin = None
        for keyword, symbol in crypto_keywords.items():
            if keyword in text_lower:
                found_coin = symbol
                break
        
        if not found_coin:
            return [] # Skip if no tracked crypto is mentioned

        # Sentiment for post
        sentiment = analyzer.polarity_scores(title + " " + review)["compound"]
        records.append([date, user_id, "post", title, found_coin, review, sentiment])
        processed_ids.add(post.id)
        
        # --- Comment Processing ---
        if post_type == "historical":
            # For historical, only load a few comments to save time/requests
            limit = 10
            post.comments.replace_more(limit=0)
            comments = post.comments.list()[:limit]
        else: # For live stream, load more aggressively
            limit = 25
            post.comments.replace_more(limit=1)
            comments = post.comments.list()[:limit]

        for comment in comments:
            if isinstance(comment, praw.models.Comment):
                comment_text = comment.body
                comment_user = str(comment.author) if comment.author else "Anonymous"
                comment_date = datetime.utcfromtimestamp(comment.created_utc).strftime("%Y-%m-%d %H:%M:%S")

                if not comment_text or comment_text in ["[deleted]", "[removed]"]:
                    continue

                comment_lower = comment_text.lower()
                comment_coin = found_coin
                # Re-check for crypto in the comment itself
                for keyword, symbol in crypto_keywords.items():
                    if keyword in comment_lower:
                        comment_coin = symbol
                        break
                
                # Use a combined ID for comment to track it
                comment_id = f"{post.id}_{comment.id}"
                if comment_id in processed_ids:
                    continue
                
                comment_sent = analyzer.polarity_scores(comment_text)["compound"]
                records.append([comment_date, comment_user, "comment", title, comment_coin, comment_text, comment_sent])
                processed_ids.add(comment_id)
                
    except Exception as e:
        print(f"Error processing submission/comment {post.id}: {e}")
        return []

    return records

# ----------------------------------------------------------------------
# DATA FETCHERS
# ----------------------------------------------------------------------

def fetch_historical_data():
    """Fetches historical data using the TIME_FILTERS, runs every 5 minutes."""
    
    print(f"\n--- ‚è≥ Running Historical Fetch (Every {HISTORICAL_FETCH_INTERVAL}s) ---")
    new_records = []
    
    for time_filter in TIME_FILTERS:
        print(f"--- Checking Top posts for: {time_filter.upper()} ---")

        # Fetch the top posts for the current time filter
        for post in reddit.subreddit("CryptoCurrency").top(time_filter=time_filter, limit=None):
            
            # Stop if we hit posts older than the start date on the 'year' filter
            if time_filter == "year" and post.created_utc < start_timestamp:
                print(f"--- Stopping historical fetch: Hit post older than {start_date_str} on 'year' filter. ---")
                return new_records # Return what we have so far

            records = process_submission(post, post_type="historical")
            if records:
                new_records.extend(records)
                print(f"‚úÖ Historical Post ({time_filter}): {datetime.utcfromtimestamp(post.created_utc).strftime('%Y-%m-%d %H:%M:%S')} | {records[0][4]} | Sentiment: {records[0][-1]:+.3f}")
    
    return new_records


def fetch_live_stream_data():
    """Fetches the newest data using 'new' or 'stream' approach, runs every 3 seconds."""
    
    print(f"\n--- ‚ö°Ô∏è Running Live Stream Fetch (Every {LIVE_STREAM_INTERVAL}s) ---")
    new_records = []
    
    # Use the 'new' sort for a quick look at the latest posts (limit of 25)
    for post in reddit.subreddit("CryptoCurrency").new(limit=25):
        records = process_submission(post, post_type="live")
        if records:
            new_records.extend(records)
            print(f"üî• Live Post: {datetime.utcfromtimestamp(post.created_utc).strftime('%H:%M:%S')} | {records[0][4]} | Sentiment: {records[0][-1]:+.3f}")
            
    return new_records

def save_records_to_csv(records):
    """Appends collected records to the CSV file."""
    if records:
        df = pd.DataFrame(records, columns=["date", "user_id", "type", "title", "cryptocurrency", "review", "sentiment_score"])
        df.to_csv(CSV_FILE, mode="a", header=False, index=False)
        print(f"\nüíæ SAVED {len(records)} new entries to {CSV_FILE}")

# ----------------------------------------------------------------------
# MAIN LOOP
# ----------------------------------------------------------------------

try:
    while True:
        current_time = time.time()
        new_data_to_save = []

        # 1. Historical Fetch (Every 5 minutes)
        if current_time - last_historical_fetch_time >= HISTORICAL_FETCH_INTERVAL:
            historical_records = fetch_historical_data()
            new_data_to_save.extend(historical_records)
            last_historical_fetch_time = current_time # Reset timer

        # 2. Live Stream Fetch (Every 3 seconds)
        if current_time - last_live_stream_time >= LIVE_STREAM_INTERVAL:
            live_records = fetch_live_stream_data()
            new_data_to_save.extend(live_records)
            last_live_stream_time = current_time # Reset timer
            
        # 3. Save any collected data
        save_records_to_csv(new_data_to_save)

        # Calculate time to sleep until the next live stream fetch
        # This keeps the loop running consistently every 3 seconds
        sleep_time = LIVE_STREAM_INTERVAL - (time.time() - current_time)
        if sleep_time > 0:
            time.sleep(sleep_time)
            
except KeyboardInterrupt:
    print("\n\nüõë Script interrupted by user (Ctrl+C). Shutting down.")
except Exception as e:
    print(f"\n\nüö® An unexpected error occurred: {e}")
    
print("Processing complete.")

üîÑ Starting Reddit post & comment sentiment streaming...

--- ‚è≥ Running Historical Fetch (Every 300s) ---
--- Checking Top posts for: DAY ---


  date = datetime.utcfromtimestamp(post_timestamp).strftime("%Y-%m-%d %H:%M:%S")
  comment_date = datetime.utcfromtimestamp(comment.created_utc).strftime("%Y-%m-%d %H:%M:%S")
  print(f"‚úÖ Historical Post ({time_filter}): {datetime.utcfromtimestamp(post.created_utc).strftime('%Y-%m-%d %H:%M:%S')} | {records[0][4]} | Sentiment: {records[0][-1]:+.3f}")


‚úÖ Historical Post (day): 2025-11-15 17:58:58 | BTC | Sentiment: +0.000
‚úÖ Historical Post (day): 2025-11-15 19:52:48 | ETH | Sentiment: +0.202
‚úÖ Historical Post (day): 2025-11-15 21:53:13 | BTC | Sentiment: +0.846
--- Checking Top posts for: WEEK ---
‚úÖ Historical Post (week): 2025-11-14 12:15:45 | BTC | Sentiment: -0.421
‚úÖ Historical Post (week): 2025-11-13 18:48:54 | BTC | Sentiment: +0.000
‚úÖ Historical Post (week): 2025-11-11 19:21:29 | BTC | Sentiment: +0.000
‚úÖ Historical Post (week): 2025-11-11 18:46:04 | BTC | Sentiment: -0.991
‚úÖ Historical Post (week): 2025-11-11 19:28:23 | BTC | Sentiment: -0.340
‚úÖ Historical Post (week): 2025-11-14 20:08:29 | BTC | Sentiment: -0.670
‚úÖ Historical Post (week): 2025-11-10 13:32:41 | BTC | Sentiment: +0.000
‚úÖ Historical Post (week): 2025-11-11 15:40:20 | BTC | Sentiment: +0.872
‚úÖ Historical Post (week): 2025-11-14 17:06:34 | BTC | Sentiment: -0.421
‚úÖ Historical Post (week): 2025-11-12 07:08:34 | BTC | Sentiment: -0.318
‚úÖ 

  print(f"üî• Live Post: {datetime.utcfromtimestamp(post.created_utc).strftime('%H:%M:%S')} | {records[0][4]} | Sentiment: {records[0][-1]:+.3f}")


üî• Live Post: 16:54:34 | ETH | Sentiment: -0.601
üî• Live Post: 16:51:59 | BTC | Sentiment: -0.850
üî• Live Post: 10:58:14 | BTC | Sentiment: -0.665
üî• Live Post: 06:12:57 | BTC | Sentiment: -0.273
üî• Live Post: 01:00:44 | ETH | Sentiment: +0.954

üíæ SAVED 1813 new entries to reddit_crypto_sentiment_analysis.csv

--- ‚ö°Ô∏è Running Live Stream Fetch (Every 3s) ---

--- ‚ö°Ô∏è Running Live Stream Fetch (Every 3s) ---

--- ‚ö°Ô∏è Running Live Stream Fetch (Every 3s) ---

--- ‚ö°Ô∏è Running Live Stream Fetch (Every 3s) ---

--- ‚ö°Ô∏è Running Live Stream Fetch (Every 3s) ---

--- ‚ö°Ô∏è Running Live Stream Fetch (Every 3s) ---

--- ‚ö°Ô∏è Running Live Stream Fetch (Every 3s) ---

--- ‚ö°Ô∏è Running Live Stream Fetch (Every 3s) ---

--- ‚ö°Ô∏è Running Live Stream Fetch (Every 3s) ---

--- ‚ö°Ô∏è Running Live Stream Fetch (Every 3s) ---

--- ‚ö°Ô∏è Running Live Stream Fetch (Every 3s) ---

--- ‚ö°Ô∏è Running Live Stream Fetch (Every 3s) ---

--- ‚ö°Ô∏è Running Live Stream Fetch (E

In [14]:
%pwd 

%cd C:\Users\yashs\source\repos\Ripa-Shah\Sentiment-Analysis\Analyzing-Social-Sentiment-Data-to-determine-correlation-with-Cryptocurrency-Price

C:\Users\yashs\source\repos\Ripa-Shah\Sentiment-Analysis\Analyzing-Social-Sentiment-Data-to-determine-correlation-with-Cryptocurrency-Price


In [15]:
import praw
import pandas as pd
import time
from datetime import datetime
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

# --- CONFIGURATION ---

# List of time filters to check, ordered from newest to oldest
TIME_FILTERS = ["day", "week", "month", "year"]

# Define the start date: June 15, 2025
start_date_str = "2025-06-15 00:00:00"
start_timestamp = datetime.strptime(start_date_str, "%Y-%m-%d %H:%M:%S").timestamp()

# Polling intervals in seconds
LIVE_STREAM_INTERVAL = 3    # Stream new posts/comments every 3 seconds
HISTORICAL_FETCH_INTERVAL = 300 # Fetch historical data every 5 minutes (300 seconds)

# CSV file path
CSV_FILE = "reddit_crypto_sentiment_streaming.csv"

# --- INITIALIZATION ---

# Initialize Reddit API (NOTE: Ensure these credentials are valid)
reddit = praw.Reddit(
    client_id="M9iSExe3kRkwrILVO75-CA",
    client_secret="rnuYodBu7llgTAmssHujZVUt37mrAA",
    user_agent="crypto-sentiment-app by u/MindlessEssay2919"
)

# Test PRAW Connection
try:
    test_post = next(reddit.subreddit("CryptoCurrency").hot(limit=1))
    print(f"‚úÖ PRAW Connection successful. Found post: {test_post.title}")
except Exception as e:
    print(f"‚ùå PRAW Connection failed! Error: {e}")
    print("Please check your client_id and client_secret.")
    exit() # Stop the script if the connection fails

# Define cryptocurrency keywords (simplified for efficiency)
crypto_keywords = {
    "bitcoin": "BTC", "btc": "BTC",
    "ethereum": "ETH", "eth": "ETH",
    "cardano": "ADA", "ada": "ADA",
    "dogecoin": "DOGE", "doge": "DOGE",
    "solana": "SOL", "sol": "SOL"
}

# Initialize sentiment analyzer
analyzer = SentimentIntensityAnalyzer()

# Initialize a set to prevent processing the same post/comment multiple times
processed_ids = set()

# Initialize timers (set to run immediately on first loop)
last_historical_fetch_time = time.time() - HISTORICAL_FETCH_INTERVAL - 1
last_live_stream_time = time.time() - LIVE_STREAM_INTERVAL - 1

# Initialize CSV file (creates the file with headers)
df_init = pd.DataFrame(columns=[
    "date", "user_id", "type", "title", "cryptocurrency", "review", "sentiment_score"
])
df_init.to_csv(CSV_FILE, index=False)
print(f"File {CSV_FILE} initialized successfully.")

print(f"\nüîÑ Starting Reddit sentiment stream (Tracking since {start_date_str})...")

# ----------------------------------------------------------------------
# HELPER FUNCTIONS
# ----------------------------------------------------------------------

def process_submission(post, post_type):
    """Processes a single PRAW submission (post) and its comments."""
    
    # Check 1: Has this post been processed already? 
    if post.id in processed_ids:
        return []

    post_timestamp = post.created_utc
    
    # Check 2: Ignore posts older than the starting date
    if post_timestamp < start_timestamp:
        return []

    records = []
    
    try:
        # --- Post Processing ---
        title = getattr(post, 'title', '')
        review = getattr(post, 'selftext', '') if hasattr(post, 'selftext') else ''
        user_id = str(post.author) if post.author else "Anonymous"
        date = datetime.utcfromtimestamp(post_timestamp).strftime("%Y-%m-%d %H:%M:%S")

        # Detect crypto (text is already lowercased for checking efficiency)
        text_lower = (title + " " + review).lower()
        found_coin = None
        for keyword, symbol in crypto_keywords.items():
            if keyword in text_lower:
                found_coin = symbol
                break
        
        if not found_coin:
            return []

        # Sentiment for post
        sentiment = analyzer.polarity_scores(title + " " + review)["compound"]
        records.append([date, user_id, "post", title, found_coin, review, sentiment])
        processed_ids.add(post.id)
        
        # --- Comment Processing ---
        limit = 10 if post_type == "historical" else 25
        
        # This resolves MoreComments objects to actual comments
        post.comments.replace_more(limit=0 if post_type == "historical" else 1)
        comments = post.comments.list()[:limit]

        for comment in comments:
            if isinstance(comment, praw.models.Comment):
                comment_text = comment.body
                comment_user = str(comment.author) if comment.author else "Anonymous"
                
                if not comment_text or comment_text in ["[deleted]", "[removed]"]:
                    continue

                comment_id = f"{post.id}_{comment.id}"
                if comment_id in processed_ids:
                    continue
                
                comment_date = datetime.utcfromtimestamp(comment.created_utc).strftime("%Y-%m-%d %H:%M:%S")
                comment_lower = comment_text.lower()
                comment_coin = found_coin # Inherit post coin initially

                # Re-check for crypto in the comment itself
                for keyword, symbol in crypto_keywords.items():
                    if keyword in comment_lower:
                        comment_coin = symbol
                        break
                
                comment_sent = analyzer.polarity_scores(comment_text)["compound"]
                records.append([comment_date, comment_user, "comment", title, comment_coin, comment_text, comment_sent])
                processed_ids.add(comment_id)
                
    except Exception as e:
        # Catch errors like deleted posts/authors/comments
        # print(f"Error processing submission/comment {post.id}: {e}") 
        return []

    return records


def fetch_historical_data():
    """Fetches historical data using the TIME_FILTERS, runs every 5 minutes."""
    
    print(f"\n--- ‚è≥ Running Historical Fetch (Top posts since {start_date_str}) ---")
    new_records = []
    
    for time_filter in TIME_FILTERS:
        # Fetch the top posts for the current time filter (PRAW's limit is ~1000)
        for post in reddit.subreddit("CryptoCurrency").top(time_filter=time_filter, limit=None):
            
            # This handles stopping the deep search once we hit very old, irrelevant content
            if time_filter == "year" and post.created_utc < start_timestamp:
                print(f"--- Stopping historical fetch: Hit content older than {start_date_str} ---")
                return new_records 

            records = process_submission(post, post_type="historical")
            if records:
                new_records.extend(records)
                print(f"‚úÖ Historical Post ({time_filter}): {datetime.utcfromtimestamp(post.created_utc).strftime('%Y-%m-%d %H:%M:%S')} | {records[0][4]} | Sentiment: {records[0][-1]:+.3f}")
    
    return new_records


def fetch_live_stream_data():
    """Fetches the newest data using 'new' sort, runs every 3 seconds."""
    
    print(f"\n--- ‚ö°Ô∏è Running Live Stream Fetch (Every {LIVE_STREAM_INTERVAL}s) ---")
    new_records = []
    
    # Use the 'new' sort for a quick look at the latest posts (limit of 25)
    for post in reddit.subreddit("CryptoCurrency").new(limit=25):
        records = process_submission(post, post_type="live")
        if records:
            new_records.extend(records)
            print(f"üî• Live Post: {datetime.utcfromtimestamp(post.created_utc).strftime('%H:%M:%S')} | {records[0][4]} | Sentiment: {records[0][-1]:+.3f}")
            
    return new_records


def save_records_to_csv(records):
    """Appends collected records to the CSV file."""
    if records:
        df = pd.DataFrame(records, columns=["date", "user_id", "type", "title", "cryptocurrency", "review", "sentiment_score"])
        # Append data to the CSV without writing the header again
        df.to_csv(CSV_FILE, mode="a", header=False, index=False)
        print(f"\nüíæ SAVED {len(records)} new entries to {CSV_FILE}")

# ----------------------------------------------------------------------
# MAIN LOOP
# ----------------------------------------------------------------------

try:
    while True:
        current_time = time.time()
        new_data_to_save = []

        # 1. Historical Fetch (Every 5 minutes)
        if current_time - last_historical_fetch_time >= HISTORICAL_FETCH_INTERVAL:
            historical_records = fetch_historical_data()
            new_data_to_save.extend(historical_records)
            last_historical_fetch_time = current_time # Reset timer

        # 2. Live Stream Fetch (Every 3 seconds)
        if current_time - last_live_stream_time >= LIVE_STREAM_INTERVAL:
            live_records = fetch_live_stream_data()
            new_data_to_save.extend(live_records)
            last_live_stream_time = current_time # Reset timer
            
        # 3. Save any collected data
        if new_data_to_save:
            save_records_to_csv(new_data_to_save)
        
        # Calculate time to sleep to maintain the 3-second live stream interval
        time_spent = time.time() - current_time
        sleep_time = max(0, LIVE_STREAM_INTERVAL - time_spent)
        
        if sleep_time > 0:
            time.sleep(sleep_time)
            
except KeyboardInterrupt:
    print("\n\nüõë Script interrupted by user (Ctrl+C). Shutting down.")
except Exception as e:
    print(f"\n\nüö® An unexpected error occurred: {e}")
    
print("Processing complete.")

‚úÖ PRAW Connection successful. Found post: Daily Crypto Discussion - November 16, 2025 (GMT+0)
File reddit_crypto_sentiment_streaming.csv initialized successfully.

üîÑ Starting Reddit sentiment stream (Tracking since 2025-06-15 00:00:00)...

--- ‚è≥ Running Historical Fetch (Top posts since 2025-06-15 00:00:00) ---


  date = datetime.utcfromtimestamp(post_timestamp).strftime("%Y-%m-%d %H:%M:%S")
  comment_date = datetime.utcfromtimestamp(comment.created_utc).strftime("%Y-%m-%d %H:%M:%S")
  print(f"‚úÖ Historical Post ({time_filter}): {datetime.utcfromtimestamp(post.created_utc).strftime('%Y-%m-%d %H:%M:%S')} | {records[0][4]} | Sentiment: {records[0][-1]:+.3f}")


‚úÖ Historical Post (day): 2025-11-16 06:12:57 | BTC | Sentiment: -0.273
‚úÖ Historical Post (day): 2025-11-15 17:58:58 | BTC | Sentiment: +0.000
‚úÖ Historical Post (day): 2025-11-15 19:52:48 | ETH | Sentiment: +0.202
‚úÖ Historical Post (day): 2025-11-16 01:00:44 | ETH | Sentiment: +0.954
‚úÖ Historical Post (day): 2025-11-16 05:21:56 | BTC | Sentiment: +0.000
‚úÖ Historical Post (day): 2025-11-16 10:46:20 | BTC | Sentiment: -0.178
‚úÖ Historical Post (day): 2025-11-16 10:36:02 | BTC | Sentiment: +0.534
‚úÖ Historical Post (day): 2025-11-16 16:51:59 | BTC | Sentiment: -0.850
‚úÖ Historical Post (day): 2025-11-16 07:20:12 | BTC | Sentiment: +0.978
‚úÖ Historical Post (day): 2025-11-15 21:53:13 | BTC | Sentiment: +0.846
‚úÖ Historical Post (day): 2025-11-16 16:54:34 | ETH | Sentiment: -0.601
‚úÖ Historical Post (day): 2025-11-16 10:58:14 | BTC | Sentiment: -0.665
‚úÖ Historical Post (week): 2025-11-14 12:15:45 | BTC | Sentiment: -0.421
‚úÖ Historical Post (week): 2025-11-13 18:48:54 | 

  print(f"üî• Live Post: {datetime.utcfromtimestamp(post.created_utc).strftime('%H:%M:%S')} | {records[0][4]} | Sentiment: {records[0][-1]:+.3f}")


üî• Live Post: 18:34:27 | BTC | Sentiment: +0.850

üíæ SAVED 1 new entries to reddit_crypto_sentiment_streaming.csv

--- ‚ö°Ô∏è Running Live Stream Fetch (Every 3s) ---

--- ‚ö°Ô∏è Running Live Stream Fetch (Every 3s) ---

--- ‚ö°Ô∏è Running Live Stream Fetch (Every 3s) ---

--- ‚ö°Ô∏è Running Live Stream Fetch (Every 3s) ---

--- ‚ö°Ô∏è Running Live Stream Fetch (Every 3s) ---

--- ‚ö°Ô∏è Running Live Stream Fetch (Every 3s) ---

--- ‚ö°Ô∏è Running Live Stream Fetch (Every 3s) ---

--- ‚ö°Ô∏è Running Live Stream Fetch (Every 3s) ---

--- ‚ö°Ô∏è Running Live Stream Fetch (Every 3s) ---

--- ‚ö°Ô∏è Running Live Stream Fetch (Every 3s) ---

--- ‚ö°Ô∏è Running Live Stream Fetch (Every 3s) ---

--- ‚ö°Ô∏è Running Live Stream Fetch (Every 3s) ---

--- ‚ö°Ô∏è Running Live Stream Fetch (Every 3s) ---

--- ‚ö°Ô∏è Running Live Stream Fetch (Every 3s) ---

--- ‚ö°Ô∏è Running Live Stream Fetch (Every 3s) ---

--- ‚ö°Ô∏è Running Live Stream Fetch (Every 3s) ---

--- ‚ö°Ô∏è Running Live Stream Fe

In [17]:
pip install yfinance pandas

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [None]:
# --- Configuration ---
TICKERS = ["BTC-USD", "ETH-USD", "ADA-USD", "DOGE-USD", "SOL-USD"]
HISTORICAL_START_DATE = "2025-06-15"
HISTORICAL_INTERVAL = "5m"  # 5-minute intervals
CSV_FILE_HISTORICAL = "crypto_prices.csv"

print(f"\n--- Retrieving Historical Data from {HISTORICAL_START_DATE} at {HISTORICAL_INTERVAL} intervals ---")

try:
    # Use start= and interval= for historical data
    df_historical_wide = yf.download(
        tickers=TICKERS,
        start=HISTORICAL_START_DATE,
        interval=HISTORICAL_INTERVAL,
        show_errors=True
    )["Close"]

    # Convert the wide format DataFrame to a long (melted) format
    # This aligns with the structure of your CSV output (timestamp, symbol, price)
    df_historical_wide.index.name = "timestamp"
    df_historical_long = df_historical_wide.reset_index().melt(
        id_vars='timestamp',
        value_vars=TICKERS,
        var_name='symbol',
        value_name='price'
    ).dropna() # Remove rows where price data might be missing

    # Save to CSV
    df_historical_long.to_csv(CSV_FILE_HISTORICAL, index=False)

    print(f"\n‚úÖ Historical data successfully retrieved and saved to {CSV_FILE_HISTORICAL}")
    print(f"Total historical records: {len(df_historical_long):,}")

except Exception as e:
    print(f"\n‚ùå Error retrieving historical data: {e}")

In [None]:
import yfinance as yf
import pandas as pd
import time
from datetime import datetime

# --- Configuration ---
TICKERS = ["BTC-USD", "ETH-USD", "ADA-USD", "DOGE-USD", "SOL-USD"]
CSV_FILE_LIVE = "crypto_prices.csv"
LIVE_UPDATE_INTERVAL = 3  # Seconds

# Initialize CSV file with headers (only once)
df_init = pd.DataFrame(columns=["timestamp", "symbol", "price"])
df_init.to_csv(CSV_FILE_LIVE, index=False)

print(f"Starting live crypto price retrieval every {LIVE_UPDATE_INTERVAL} seconds...")

try:
    while True:
        # Get latest 1-minute bar data. '1m' is the smallest interval yfinance supports.
        # We request the last 5 minutes of data ('5m') to ensure we catch the latest bar.
        data = yf.download(
            tickers=TICKERS,
            period="1d",
            interval="1m",
            progress=False
        )["Close"].tail(1)

        # Check if data was successfully retrieved (Yahoo sometimes returns NaN/empty)
        if not data.empty and not data.iloc[0].isnull().all():
            timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
            records = []

            # Iterate through available data (only tickers present in data)
            for symbol in TICKERS:
                try:
                    price = data[symbol].values[0]
                    records.append([timestamp, symbol, price])
                    print(f"{timestamp} | {symbol} | ${price:.4f}")
                except IndexError:
                    pass

            # Append to CSV file
            df = pd.DataFrame(records, columns=["timestamp", "symbol", "price"])
            df.to_csv(CSV_FILE, mode="a", header=False, index=False)

            
        else:
            print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')} | No new price data retrieved.")

        # Wait for 3 seconds before the next update
        time.sleep(LIVE_UPDATE_INTERVAL)

except KeyboardInterrupt:
    print("\nLive streaming stopped by user.")


Starting live crypto price retrieval every 3 seconds...


  data = yf.download(


2025-11-16 13:34:55 | BTC-USD | $93631.0156
2025-11-16 13:34:55 | ETH-USD | $3064.4485
2025-11-16 13:34:55 | ADA-USD | $0.4777
2025-11-16 13:34:55 | DOGE-USD | $0.1552
2025-11-16 13:34:55 | SOL-USD | $135.5720


  data = yf.download(


2025-11-16 13:34:59 | BTC-USD | $93631.0156
2025-11-16 13:34:59 | ETH-USD | $3064.4485
2025-11-16 13:34:59 | ADA-USD | $0.4777
2025-11-16 13:34:59 | DOGE-USD | $0.1552
2025-11-16 13:34:59 | SOL-USD | $135.5720


  data = yf.download(


2025-11-16 13:35:02 | BTC-USD | $93631.0156
2025-11-16 13:35:02 | ETH-USD | $3064.4485
2025-11-16 13:35:02 | ADA-USD | $0.4777
2025-11-16 13:35:02 | DOGE-USD | $0.1552
2025-11-16 13:35:02 | SOL-USD | $135.5720


  data = yf.download(


2025-11-16 13:35:06 | BTC-USD | $93631.0156
2025-11-16 13:35:06 | ETH-USD | $3064.4485
2025-11-16 13:35:06 | ADA-USD | $0.4777
2025-11-16 13:35:06 | DOGE-USD | $0.1552
2025-11-16 13:35:06 | SOL-USD | $135.5720


  data = yf.download(


2025-11-16 13:35:09 | BTC-USD | $93631.0156
2025-11-16 13:35:09 | ETH-USD | $3064.4485
2025-11-16 13:35:09 | ADA-USD | $0.4777
2025-11-16 13:35:09 | DOGE-USD | $0.1552
2025-11-16 13:35:09 | SOL-USD | $135.5720


  data = yf.download(


2025-11-16 13:35:13 | BTC-USD | $93631.0156
2025-11-16 13:35:13 | ETH-USD | $3064.4485
2025-11-16 13:35:13 | ADA-USD | $0.4777
2025-11-16 13:35:13 | DOGE-USD | $0.1552
2025-11-16 13:35:13 | SOL-USD | $135.5720


  data = yf.download(


2025-11-16 13:35:17 | BTC-USD | $93631.0156
2025-11-16 13:35:17 | ETH-USD | $3064.4485
2025-11-16 13:35:17 | ADA-USD | $0.4777
2025-11-16 13:35:17 | DOGE-USD | $0.1552
2025-11-16 13:35:17 | SOL-USD | $135.5720


  data = yf.download(


2025-11-16 13:35:20 | BTC-USD | $93631.0156
2025-11-16 13:35:20 | ETH-USD | $3064.4485
2025-11-16 13:35:20 | ADA-USD | $0.4777
2025-11-16 13:35:20 | DOGE-USD | $0.1552
2025-11-16 13:35:20 | SOL-USD | $135.5720


  data = yf.download(


2025-11-16 13:35:24 | BTC-USD | $93631.0156
2025-11-16 13:35:24 | ETH-USD | $3064.4485
2025-11-16 13:35:24 | ADA-USD | $0.4777
2025-11-16 13:35:24 | DOGE-USD | $0.1552
2025-11-16 13:35:24 | SOL-USD | $135.5720


  data = yf.download(


2025-11-16 13:35:27 | BTC-USD | $93631.0156
2025-11-16 13:35:27 | ETH-USD | $3064.4485
2025-11-16 13:35:27 | ADA-USD | $0.4777
2025-11-16 13:35:27 | DOGE-USD | $0.1552
2025-11-16 13:35:27 | SOL-USD | $135.5720


  data = yf.download(


2025-11-16 13:35:31 | BTC-USD | $93631.0156
2025-11-16 13:35:31 | ETH-USD | $3064.4485
2025-11-16 13:35:31 | ADA-USD | $0.4777
2025-11-16 13:35:31 | DOGE-USD | $0.1552
2025-11-16 13:35:31 | SOL-USD | $135.5720


  data = yf.download(


2025-11-16 13:35:34 | BTC-USD | $93631.0156
2025-11-16 13:35:34 | ETH-USD | $3064.4485
2025-11-16 13:35:34 | ADA-USD | $0.4777
2025-11-16 13:35:34 | DOGE-USD | $0.1552
2025-11-16 13:35:34 | SOL-USD | $135.5720


  data = yf.download(


2025-11-16 13:35:37 | BTC-USD | $93631.0156
2025-11-16 13:35:37 | ETH-USD | $3064.4485
2025-11-16 13:35:37 | ADA-USD | $0.4777
2025-11-16 13:35:37 | DOGE-USD | $0.1552
2025-11-16 13:35:37 | SOL-USD | $135.5720


  data = yf.download(


2025-11-16 13:35:41 | BTC-USD | $93631.0156
2025-11-16 13:35:41 | ETH-USD | $3064.4485
2025-11-16 13:35:41 | ADA-USD | $0.4777
2025-11-16 13:35:41 | DOGE-USD | $0.1552
2025-11-16 13:35:41 | SOL-USD | $135.5720


  data = yf.download(


2025-11-16 13:35:44 | BTC-USD | $93631.0156
2025-11-16 13:35:44 | ETH-USD | $3064.4485
2025-11-16 13:35:44 | ADA-USD | $0.4777
2025-11-16 13:35:44 | DOGE-USD | $0.1552
2025-11-16 13:35:44 | SOL-USD | $135.5720


  data = yf.download(


2025-11-16 13:35:48 | BTC-USD | $93700.5391
2025-11-16 13:35:48 | ETH-USD | $3072.3860
2025-11-16 13:35:48 | ADA-USD | $0.4779
2025-11-16 13:35:48 | DOGE-USD | $0.1553
2025-11-16 13:35:48 | SOL-USD | $nan


  data = yf.download(


2025-11-16 13:35:51 | BTC-USD | $93700.5391
2025-11-16 13:35:51 | ETH-USD | $3072.3860
2025-11-16 13:35:51 | ADA-USD | $0.4779
2025-11-16 13:35:51 | DOGE-USD | $0.1553
2025-11-16 13:35:51 | SOL-USD | $135.6941


  data = yf.download(


2025-11-16 13:35:55 | BTC-USD | $93700.5391
2025-11-16 13:35:55 | ETH-USD | $3072.3860
2025-11-16 13:35:55 | ADA-USD | $0.4779
2025-11-16 13:35:55 | DOGE-USD | $0.1553
2025-11-16 13:35:55 | SOL-USD | $135.6941


  data = yf.download(


2025-11-16 13:35:58 | BTC-USD | $93700.5391
2025-11-16 13:35:58 | ETH-USD | $3072.3860
2025-11-16 13:35:58 | ADA-USD | $0.4779
2025-11-16 13:35:58 | DOGE-USD | $0.1553
2025-11-16 13:35:58 | SOL-USD | $135.6941


  data = yf.download(


2025-11-16 13:36:02 | BTC-USD | $93700.5391
2025-11-16 13:36:02 | ETH-USD | $3072.3860
2025-11-16 13:36:02 | ADA-USD | $0.4779
2025-11-16 13:36:02 | DOGE-USD | $0.1553
2025-11-16 13:36:02 | SOL-USD | $135.6941


  data = yf.download(


2025-11-16 13:36:05 | BTC-USD | $93700.5391
2025-11-16 13:36:05 | ETH-USD | $3072.3860
2025-11-16 13:36:05 | ADA-USD | $0.4779
2025-11-16 13:36:05 | DOGE-USD | $0.1553
2025-11-16 13:36:05 | SOL-USD | $135.6941


  data = yf.download(


2025-11-16 13:36:09 | BTC-USD | $93700.5391
2025-11-16 13:36:09 | ETH-USD | $3072.3860
2025-11-16 13:36:09 | ADA-USD | $0.4779
2025-11-16 13:36:09 | DOGE-USD | $0.1553
2025-11-16 13:36:09 | SOL-USD | $135.6941


  data = yf.download(


2025-11-16 13:36:12 | BTC-USD | $93700.5391
2025-11-16 13:36:12 | ETH-USD | $3072.3860
2025-11-16 13:36:12 | ADA-USD | $0.4779
2025-11-16 13:36:12 | DOGE-USD | $0.1553
2025-11-16 13:36:12 | SOL-USD | $135.6941


  data = yf.download(


2025-11-16 13:36:15 | BTC-USD | $93700.5391
2025-11-16 13:36:15 | ETH-USD | $3072.3860
2025-11-16 13:36:15 | ADA-USD | $0.4779
2025-11-16 13:36:15 | DOGE-USD | $0.1553
2025-11-16 13:36:15 | SOL-USD | $135.6941


  data = yf.download(


2025-11-16 13:36:19 | BTC-USD | $93700.5391
2025-11-16 13:36:19 | ETH-USD | $3072.3860
2025-11-16 13:36:19 | ADA-USD | $0.4779
2025-11-16 13:36:19 | DOGE-USD | $0.1553
2025-11-16 13:36:19 | SOL-USD | $135.6941


  data = yf.download(


2025-11-16 13:36:22 | BTC-USD | $93700.5391
2025-11-16 13:36:22 | ETH-USD | $3072.3860
2025-11-16 13:36:22 | ADA-USD | $0.4779
2025-11-16 13:36:22 | DOGE-USD | $0.1553
2025-11-16 13:36:22 | SOL-USD | $135.6941


  data = yf.download(


2025-11-16 13:36:26 | BTC-USD | $93700.5391
2025-11-16 13:36:26 | ETH-USD | $3072.3860
2025-11-16 13:36:26 | ADA-USD | $0.4779
2025-11-16 13:36:26 | DOGE-USD | $0.1553
2025-11-16 13:36:26 | SOL-USD | $135.6941


  data = yf.download(


2025-11-16 13:36:29 | BTC-USD | $93700.5391
2025-11-16 13:36:29 | ETH-USD | $3072.3860
2025-11-16 13:36:29 | ADA-USD | $0.4779
2025-11-16 13:36:29 | DOGE-USD | $0.1553
2025-11-16 13:36:29 | SOL-USD | $135.6941


  data = yf.download(


2025-11-16 13:36:32 | BTC-USD | $93700.5391
2025-11-16 13:36:32 | ETH-USD | $3072.3860
2025-11-16 13:36:32 | ADA-USD | $0.4779
2025-11-16 13:36:32 | DOGE-USD | $0.1553
2025-11-16 13:36:32 | SOL-USD | $135.6941


  data = yf.download(


2025-11-16 13:36:36 | BTC-USD | $93700.5391
2025-11-16 13:36:36 | ETH-USD | $3072.3860
2025-11-16 13:36:36 | ADA-USD | $0.4779
2025-11-16 13:36:36 | DOGE-USD | $0.1553
2025-11-16 13:36:36 | SOL-USD | $135.6941


  data = yf.download(


2025-11-16 13:36:39 | BTC-USD | $93700.5391
2025-11-16 13:36:39 | ETH-USD | $3072.3860
2025-11-16 13:36:39 | ADA-USD | $0.4779
2025-11-16 13:36:39 | DOGE-USD | $0.1553
2025-11-16 13:36:39 | SOL-USD | $135.6941


  data = yf.download(


2025-11-16 13:36:43 | BTC-USD | $93700.5391
2025-11-16 13:36:43 | ETH-USD | $3072.3860
2025-11-16 13:36:43 | ADA-USD | $0.4779
2025-11-16 13:36:43 | DOGE-USD | $0.1553
2025-11-16 13:36:43 | SOL-USD | $135.6941


  data = yf.download(


2025-11-16 13:36:47 | BTC-USD | $93700.5391
2025-11-16 13:36:47 | ETH-USD | $3072.3860
2025-11-16 13:36:47 | ADA-USD | $0.4779
2025-11-16 13:36:47 | DOGE-USD | $0.1553
2025-11-16 13:36:47 | SOL-USD | $135.6941


  data = yf.download(


2025-11-16 13:36:50 | BTC-USD | $nan
2025-11-16 13:36:50 | ETH-USD | $nan
2025-11-16 13:36:50 | ADA-USD | $0.4781
2025-11-16 13:36:50 | DOGE-USD | $nan
2025-11-16 13:36:50 | SOL-USD | $nan


  data = yf.download(


2025-11-16 13:36:54 | BTC-USD | $nan
2025-11-16 13:36:54 | ETH-USD | $nan
2025-11-16 13:36:54 | ADA-USD | $0.4781
2025-11-16 13:36:54 | DOGE-USD | $nan
2025-11-16 13:36:54 | SOL-USD | $nan


  data = yf.download(


2025-11-16 13:36:58 | BTC-USD | $nan
2025-11-16 13:36:58 | ETH-USD | $nan
2025-11-16 13:36:58 | ADA-USD | $0.4781
2025-11-16 13:36:58 | DOGE-USD | $nan
2025-11-16 13:36:58 | SOL-USD | $nan


  data = yf.download(


2025-11-16 13:37:01 | BTC-USD | $nan
2025-11-16 13:37:01 | ETH-USD | $nan
2025-11-16 13:37:01 | ADA-USD | $0.4781
2025-11-16 13:37:01 | DOGE-USD | $nan
2025-11-16 13:37:01 | SOL-USD | $nan


  data = yf.download(


2025-11-16 13:37:08 | BTC-USD | $nan
2025-11-16 13:37:08 | ETH-USD | $nan
2025-11-16 13:37:08 | ADA-USD | $0.4781
2025-11-16 13:37:08 | DOGE-USD | $nan
2025-11-16 13:37:08 | SOL-USD | $nan


  data = yf.download(


2025-11-16 13:37:12 | BTC-USD | $nan
2025-11-16 13:37:12 | ETH-USD | $nan
2025-11-16 13:37:12 | ADA-USD | $0.4781
2025-11-16 13:37:12 | DOGE-USD | $nan
2025-11-16 13:37:12 | SOL-USD | $nan


  data = yf.download(


2025-11-16 13:37:15 | BTC-USD | $nan
2025-11-16 13:37:15 | ETH-USD | $nan
2025-11-16 13:37:15 | ADA-USD | $0.4781
2025-11-16 13:37:15 | DOGE-USD | $nan
2025-11-16 13:37:15 | SOL-USD | $nan


2025-11-16 13:37:19 | BTC-USD | $nan
2025-11-16 13:37:19 | ETH-USD | $nan
2025-11-16 13:37:19 | ADA-USD | $0.4781
2025-11-16 13:37:19 | DOGE-USD | $nan
2025-11-16 13:37:19 | SOL-USD | $nan


  data = yf.download(


2025-11-16 13:37:22 | BTC-USD | $nan
2025-11-16 13:37:22 | ETH-USD | $nan
2025-11-16 13:37:22 | ADA-USD | $0.4781
2025-11-16 13:37:22 | DOGE-USD | $nan
2025-11-16 13:37:22 | SOL-USD | $nan


  data = yf.download(


2025-11-16 13:37:26 | BTC-USD | $nan
2025-11-16 13:37:26 | ETH-USD | $nan
2025-11-16 13:37:26 | ADA-USD | $0.4781
2025-11-16 13:37:26 | DOGE-USD | $nan
2025-11-16 13:37:26 | SOL-USD | $nan


  data = yf.download(


2025-11-16 13:37:29 | BTC-USD | $nan
2025-11-16 13:37:29 | ETH-USD | $nan
2025-11-16 13:37:29 | ADA-USD | $0.4781
2025-11-16 13:37:29 | DOGE-USD | $nan
2025-11-16 13:37:29 | SOL-USD | $nan


  data = yf.download(


2025-11-16 13:37:32 | BTC-USD | $nan
2025-11-16 13:37:32 | ETH-USD | $nan
2025-11-16 13:37:32 | ADA-USD | $0.4781
2025-11-16 13:37:32 | DOGE-USD | $nan
2025-11-16 13:37:32 | SOL-USD | $nan
