<a href="https://colab.research.google.com/github/Aayush015/Research_crypto_price_analysis/blob/main/Crypto_Price_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Data Collection
First, we will collect crypto price data, reddit data, and traditional news data each from their own APIs.

In [7]:
#!/usr/bin/env python
import requests
import pandas as pd
from datetime import datetime, timedelta

def fetch_crypto_data_hourly(crypto_id, start_date, end_date):
    """
    Fetch hourly historical price data for a cryptocurrency from CoinCap API.

    :param crypto_id: Cryptocurrency ID (e.g., 'bitcoin', 'ethereum').
    :param start_date: Start date (datetime object).
    :param end_date: End date (datetime object).
    :return: List of historical hourly data for the cryptocurrency.
    """
    url = f"https://api.coincap.io/v2/assets/{crypto_id}/history"
    all_data = []

    # Fetch data in intervals to avoid API limitations
    while start_date < end_date:
        interval_end = min(start_date + timedelta(days=30), end_date)  # Fetch 1 month at a time
        params = {
            "interval": "h1",  # Hourly data
            "start": int(start_date.timestamp() * 1000),  # Start timestamp in ms
            "end": int(interval_end.timestamp() * 1000),  # End timestamp in ms
        }

        try:
            response = requests.get(url, params=params)
            response.raise_for_status()
            data = response.json().get("data", [])
            # Add the data to the collection
            all_data.extend(data)
        except requests.exceptions.RequestException as ex:
            print(f"Error fetching data for {crypto_id}: {ex}")
            break

        start_date = interval_end + timedelta(hours=1)  # Move to the next interval

    return all_data

def save_to_csv(data, filename):
    """
    Save data to a CSV file.

    :param data: List of historical price data.
    :param filename: Output CSV filename.
    """
    if not data:
        print("No data to save.")
        return

    # Convert the data to a pandas DataFrame
    df = pd.DataFrame(data)
    # Ensure timestamps are in human-readable format
    df["time"] = pd.to_datetime(df["time"], unit="ms")
    # Rename columns for clarity
    df.rename(columns={"priceUsd": "price_usd"}, inplace=True)
    df.to_csv(filename, index=False)
    print(f"Data saved to {filename}")

def run():
    """
    Main function to fetch and save hourly historical data for specified cryptocurrencies.
    """
    cryptos = ["bitcoin", "ethereum", "dogecoin", "shiba-inu"]  # Cryptocurrencies of interest
    start_date = datetime(2017, 1, 1)  # Start of historical data
    end_date = datetime(2023, 12, 31)  # End of historical data

    for crypto in cryptos:
        print(f"Fetching hourly data for {crypto}...")
        data = fetch_crypto_data_hourly(crypto, start_date, end_date)

        # Add a column identifying the cryptocurrency
        for entry in data:
            entry["crypto"] = crypto

        # Save data to a CSV file
        filename = f"{crypto}_hourly_2017_to_2023.csv"
        save_to_csv(data, filename)

if __name__ == "__main__":
    run()
    print("Hourly data collection completed.")

Fetching hourly data for bitcoin...
Data saved to bitcoin_hourly_2017_to_2023.csv
Fetching hourly data for ethereum...
Data saved to ethereum_hourly_2017_to_2023.csv
Fetching hourly data for dogecoin...
Data saved to dogecoin_hourly_2017_to_2023.csv
Fetching hourly data for shiba-inu...
Data saved to shiba-inu_hourly_2017_to_2023.csv
Hourly data collection completed.
