# Cryptocurrency Data Downloader
## Download Latest Data (2020-2025) for Multiple Cryptocurrencies

In [1]:
# Install required libraries (run once)
# !pip install yfinance pandas numpy requests

import yfinance as yf
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import os
import time
import warnings
warnings.filterwarnings('ignore')

print("Libraries imported successfully!")
print(f"Current date: {datetime.now().strftime('%Y-%m-%d')}")

Libraries imported successfully!
Current date: 2025-11-14


## 1. Define Cryptocurrencies to Download

In [2]:
# List of cryptocurrencies (Add/Remove as needed)
crypto_symbols = [
    'BTC-USD',   # Bitcoin
    'ETH-USD',   # Ethereum
    'ADA-USD',   # Cardano
    'BNB-USD',   # Binance Coin
    'XRP-USD',   # Ripple
    'SOL-USD',   # Solana
    'DOT-USD',   # Polkadot
    'DOGE-USD',  # Dogecoin
    'MATIC-USD', # Polygon
    'AVAX-USD',  # Avalanche
    'LINK-USD',  # Chainlink
    'UNI-USD',   # Uniswap
    'LTC-USD',   # Litecoin
    'ATOM-USD',  # Cosmos
    'XLM-USD',   # Stellar
    'ALGO-USD',  # Algorand
    'VET-USD',   # VeChain
    'TRX-USD',   # Tron
    'FIL-USD',   # Filecoin
    'ETC-USD',   # Ethereum Classic
    'XMR-USD',   # Monero
    'AAVE-USD',  # Aave
    'THETA-USD', # Theta
    'EOS-USD',   # EOS
    'XTZ-USD',   # Tezos
]

# Create mapping of symbols to clean names
crypto_names = {
    'BTC-USD': 'Bitcoin',
    'ETH-USD': 'Ethereum',
    'ADA-USD': 'Cardano',
    'BNB-USD': 'Binance Coin',
    'XRP-USD': 'Ripple',
    'SOL-USD': 'Solana',
    'DOT-USD': 'Polkadot',
    'DOGE-USD': 'Dogecoin',
    'MATIC-USD': 'Polygon',
    'AVAX-USD': 'Avalanche',
    'LINK-USD': 'Chainlink',
    'UNI-USD': 'Uniswap',
    'LTC-USD': 'Litecoin',
    'ATOM-USD': 'Cosmos',
    'XLM-USD': 'Stellar',
    'ALGO-USD': 'Algorand',
    'VET-USD': 'VeChain',
    'TRX-USD': 'Tron',
    'FIL-USD': 'Filecoin',
    'ETC-USD': 'Ethereum Classic',
    'XMR-USD': 'Monero',
    'AAVE-USD': 'Aave',
    'THETA-USD': 'Theta',
    'EOS-USD': 'EOS',
    'XTZ-USD': 'Tezos',
}

print(f"Total cryptocurrencies to download: {len(crypto_symbols)}")
print("\nList:")
for i, symbol in enumerate(crypto_symbols, 1):
    print(f"{i:2d}. {crypto_names.get(symbol, symbol)}")

Total cryptocurrencies to download: 25

List:
 1. Bitcoin
 2. Ethereum
 3. Cardano
 4. Binance Coin
 5. Ripple
 6. Solana
 7. Polkadot
 8. Dogecoin
 9. Polygon
10. Avalanche
11. Chainlink
12. Uniswap
13. Litecoin
14. Cosmos
15. Stellar
16. Algorand
17. VeChain
18. Tron
19. Filecoin
20. Ethereum Classic
21. Monero
22. Aave
23. Theta
24. EOS
25. Tezos


## 2. Set Date Range

In [3]:
# Define date range
START_DATE = '2020-01-01'
END_DATE = datetime.now().strftime('%Y-%m-%d')  # Today's date

print(f"Downloading data from {START_DATE} to {END_DATE}")
print(f"Total duration: ~{(datetime.now() - datetime.strptime(START_DATE, '%Y-%m-%d')).days} days")

Downloading data from 2020-01-01 to 2025-11-14
Total duration: ~2144 days


## 3. Download Data Function

In [4]:
def download_crypto_data(symbol, start_date, end_date):
    """
    Download cryptocurrency data from Yahoo Finance
    
    Parameters:
    - symbol: Crypto symbol (e.g., 'BTC-USD')
    - start_date: Start date (YYYY-MM-DD)
    - end_date: End date (YYYY-MM-DD)
    
    Returns:
    - DataFrame with OHLCV data
    """
    try:
        # Download data
        data = yf.download(symbol, start=start_date, end=end_date, progress=False)
        
        if data.empty:
            print(f"  ‚ùå No data found for {symbol}")
            return None
        
        # Reset index to make Date a column
        data = data.reset_index()
        
        # Rename columns to match your format
        data = data.rename(columns={
            'Date': 'Date',
            'Open': 'Open',
            'High': 'High',
            'Low': 'Low',
            'Close': 'Close',
            'Volume': 'Volume',
            'Adj Close': 'Adj_Close'
        })
        
        # Add metadata columns
        clean_symbol = symbol.replace('-USD', '')
        data['Symbol'] = clean_symbol
        data['Name'] = crypto_names.get(symbol, clean_symbol)
        
        # Calculate Market Cap (approximation: Close * Volume)
        # Note: This is not real market cap, just a placeholder
        data['Marketcap'] = data['Close'] * data['Volume']
        
        # Add SNo
        data.insert(0, 'SNo', range(1, len(data) + 1))
        
        # Select and reorder columns to match your format
        columns_order = ['SNo', 'Name', 'Symbol', 'Date', 'High', 'Low', 
                        'Open', 'Close', 'Volume', 'Marketcap']
        data = data[columns_order]
        
        return data
        
    except Exception as e:
        print(f"  ‚ùå Error downloading {symbol}: {str(e)}")
        return None

print("Download function ready!")

Download function ready!


## 4. Download All Cryptocurrencies

In [5]:
# Create directories
os.makedirs('crypto_data_new', exist_ok=True)
os.makedirs('crypto_data_old', exist_ok=True)  # Backup your old data here

print("Starting download...\n")
print("=" * 70)

downloaded_data = {}
successful_downloads = 0
failed_downloads = 0

for i, symbol in enumerate(crypto_symbols, 1):
    print(f"[{i}/{len(crypto_symbols)}] Downloading {crypto_names.get(symbol, symbol)} ({symbol})...")
    
    # Download data
    df = download_crypto_data(symbol, START_DATE, END_DATE)
    
    if df is not None:
        # Save to CSV
        clean_symbol = symbol.replace('-USD', '')
        filename = f'crypto_data_new/{clean_symbol.lower()}_data.csv'
        df.to_csv(filename, index=False)
        
        downloaded_data[symbol] = df
        successful_downloads += 1
        
        print(f"  ‚úÖ Downloaded {len(df)} records")
        print(f"  üíæ Saved to: {filename}")
    else:
        failed_downloads += 1
    
    print()
    
    # Small delay to avoid rate limiting
    time.sleep(0.5)

print("=" * 70)
print(f"\n‚úÖ Successfully downloaded: {successful_downloads}/{len(crypto_symbols)}")
print(f"‚ùå Failed downloads: {failed_downloads}/{len(crypto_symbols)}")

Starting download...

[1/25] Downloading Bitcoin (BTC-USD)...
  ‚úÖ Downloaded 2144 records
  üíæ Saved to: crypto_data_new/btc_data.csv

[2/25] Downloading Ethereum (ETH-USD)...
  ‚úÖ Downloaded 2144 records
  üíæ Saved to: crypto_data_new/eth_data.csv

[3/25] Downloading Cardano (ADA-USD)...
  ‚úÖ Downloaded 2144 records
  üíæ Saved to: crypto_data_new/ada_data.csv

[4/25] Downloading Binance Coin (BNB-USD)...
  ‚úÖ Downloaded 2144 records
  üíæ Saved to: crypto_data_new/bnb_data.csv

[5/25] Downloading Ripple (XRP-USD)...
  ‚úÖ Downloaded 2144 records
  üíæ Saved to: crypto_data_new/xrp_data.csv

[6/25] Downloading Solana (SOL-USD)...
  ‚úÖ Downloaded 2044 records
  üíæ Saved to: crypto_data_new/sol_data.csv

[7/25] Downloading Polkadot (DOT-USD)...
  ‚úÖ Downloaded 1912 records
  üíæ Saved to: crypto_data_new/dot_data.csv

[8/25] Downloading Dogecoin (DOGE-USD)...
  ‚úÖ Downloaded 2144 records
  üíæ Saved to: crypto_data_new/doge_data.csv

[9/25] Downloading Polygon (MATIC-

## 5. Combine All Data into Single File

In [10]:
# Combine all downloaded data
if downloaded_data:
    combined_df = pd.concat(downloaded_data.values(), ignore_index=True)
    
    # Sort by Symbol and Date
    combined_df = combined_df.sort_values(['Symbol', 'Date']).reset_index(drop=True)
    
    # Update SNo
    combined_df['SNo'] = range(1, len(combined_df) + 1)
    
    # Save combined file
    combined_df.to_csv('crypto_data_combined.csv', index=False)
    
    print("\n" + "=" * 70)
    print("COMBINED DATASET SUMMARY")
    print("=" * 70)
    print(f"Total records: {len(combined_df):,}")
    print(f"Cryptocurrencies: {combined_df['Symbol'].nunique()}")
    print(f"Date range: {combined_df['Date'].min()} to {combined_df['Date'].max()}")
    print(f"File saved: crypto_data_combined.csv")
    print("=" * 70)
    
    # Show sample
    print("\nSample data:")
    display(combined_df.head(10))
    
    # Summary by coin
    # Add this before line 29:
    print(combined_df.columns.tolist())
    print("\nRecords per cryptocurrency:")
    print(combined_df.head())
else:
    print("No data was downloaded successfully.")


COMBINED DATASET SUMMARY
Total records: 52,286
Cryptocurrencies: 25
Date range: 2020-01-01 00:00:00 to 2025-11-13 00:00:00
File saved: crypto_data_combined.csv

Sample data:


Price,SNo,Name,Symbol,Date,High,Low,Open,Close,Volume,Marketcap,...,High,Low,Open,Close,Volume,High,Low,Open,Close,Volume
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,BTC-USD,BTC-USD,BTC-USD,BTC-USD,BTC-USD,Unnamed: 10_level_1,...,EOS-USD,EOS-USD,EOS-USD,EOS-USD,EOS-USD,XTZ-USD,XTZ-USD,XTZ-USD,XTZ-USD,XTZ-USD
0,1,Aave,AAVE,2020-10-02,,,,,,0.0,...,,,,,,,,,,
1,2,Aave,AAVE,2020-10-03,,,,,,0.0,...,,,,,,,,,,
2,3,Aave,AAVE,2020-10-04,,,,,,0.0,...,,,,,,,,,,
3,4,Aave,AAVE,2020-10-05,,,,,,0.0,...,,,,,,,,,,
4,5,Aave,AAVE,2020-10-06,,,,,,24723990.0,...,,,,,,,,,,
5,6,Aave,AAVE,2020-10-07,,,,,,27370700.0,...,,,,,,,,,,
6,7,Aave,AAVE,2020-10-08,,,,,,72597230.0,...,,,,,,,,,,
7,8,Aave,AAVE,2020-10-09,,,,,,38181650.0,...,,,,,,,,,,
8,9,Aave,AAVE,2020-10-10,,,,,,52800420.0,...,,,,,,,,,,
9,10,Aave,AAVE,2020-10-11,,,,,,34372720.0,...,,,,,,,,,,


[('SNo', ''), ('Name', ''), ('Symbol', ''), ('Date', ''), ('High', 'BTC-USD'), ('Low', 'BTC-USD'), ('Open', 'BTC-USD'), ('Close', 'BTC-USD'), ('Volume', 'BTC-USD'), ('Marketcap', ''), ('High', 'ETH-USD'), ('Low', 'ETH-USD'), ('Open', 'ETH-USD'), ('Close', 'ETH-USD'), ('Volume', 'ETH-USD'), ('High', 'ADA-USD'), ('Low', 'ADA-USD'), ('Open', 'ADA-USD'), ('Close', 'ADA-USD'), ('Volume', 'ADA-USD'), ('High', 'BNB-USD'), ('Low', 'BNB-USD'), ('Open', 'BNB-USD'), ('Close', 'BNB-USD'), ('Volume', 'BNB-USD'), ('High', 'XRP-USD'), ('Low', 'XRP-USD'), ('Open', 'XRP-USD'), ('Close', 'XRP-USD'), ('Volume', 'XRP-USD'), ('High', 'SOL-USD'), ('Low', 'SOL-USD'), ('Open', 'SOL-USD'), ('Close', 'SOL-USD'), ('Volume', 'SOL-USD'), ('High', 'DOT-USD'), ('Low', 'DOT-USD'), ('Open', 'DOT-USD'), ('Close', 'DOT-USD'), ('Volume', 'DOT-USD'), ('High', 'DOGE-USD'), ('Low', 'DOGE-USD'), ('Open', 'DOGE-USD'), ('Close', 'DOGE-USD'), ('Volume', 'DOGE-USD'), ('High', 'MATIC-USD'), ('Low', 'MATIC-USD'), ('Open', 'MATIC-U

## 6. Data Quality Check

In [13]:
# Standardize column names to match expected format
if downloaded_data:
    combined_df.columns = combined_df.columns.str.capitalize()
    print("DATA QUALITY REPORT")
    print("=" * 70)
    
    # Check for missing values
    print("\nMissing values per column:")
    print(combined_df.isnull().sum())
    
    # Check for duplicates
    duplicates = combined_df.duplicated(subset=['Symbol', 'Date']).sum()
    print(f"\nDuplicate rows (Symbol + Date): {duplicates}")
    
    # Check data types
    print("\nData types:")
    print(combined_df.dtypes)
    
    # Basic statistics
    print("\nBasic statistics:")
    display(combined_df[['High', 'Low', 'Open', 'Close', 'Volume']].describe())
    
    print("\n" + "=" * 70)

AttributeError: Can only use .str accessor with Index, not MultiIndex

## 7. Next Steps

### ‚úÖ Data Downloaded Successfully!

**What you have now:**
- Individual CSV files in `crypto_data_new/` folder
- Combined dataset: `crypto_data_combined.csv`
- Data from 2020 to present day

**Next steps:**
1. Move your old CSV files to `crypto_data_old/` folder (backup)
2. Run `01_data_processing.ipynb` with the new data
3. Run `02_exploratory_data_analysis.ipynb`
4. Run `03_ml_models.ipynb`

**Need to update data later?**
- Just run this notebook again!
- It will download the latest data automatically