In [1]:
#FOR MODEL IMPLEMENTATION


import yfinance as yf
import pandas as pd
from datetime import datetime, timedelta

#   30 cryptocurrencies 
cryptos = [
    "BTC-USD", "ETH-USD", "ADA-USD", "BNB-USD", "XRP-USD",
    "SOL-USD", "DOT-USD", "DOGE-USD", "MATIC-USD", "LTC-USD",
    "BCH-USD", "TRX-USD", "AVAX-USD", "LINK-USD", "ATOM-USD",
    "XLM-USD", "ALGO-USD", "VET-USD", "XTZ-USD", "ARB-USD",
    "OP-USD", "APT-USD", "SUI-USD", "FTM-USD", "NEAR-USD"
]

#  date range for the past year
end_date = datetime.now()
start_date = end_date - timedelta(days=365)  

# Initialize an empty DataFrame to store all data
all_crypto_data = pd.DataFrame()
failed_cryptos = []

# Loop through each cryptocurrency and download data
for crypto in cryptos:
    print(f"Downloading data for {crypto}...")
    try:
        # Download data
        data = yf.download(crypto, start=start_date.strftime("%Y-%m-%d"), end=end_date.strftime("%Y-%m-%d"))
        
        # Check if data is returned
        if data.empty:
            print(f"No data found for {crypto}. Adding to the failed list.")
            failed_cryptos.append(crypto)
            continue
        
        
        selected_columns = data[['Open', 'High', 'Low', 'Adj Close', 'Volume']].copy()
        selected_columns.columns = [
            f"{crypto}_Open", f"{crypto}_High", f"{crypto}_Low",
            f"{crypto}_Adj_Close", f"{crypto}_Volume"
        ]
        
        
        if all_crypto_data.empty:
            all_crypto_data = selected_columns
        else:
            all_crypto_data = all_crypto_data.join(selected_columns, how='outer')
    
    except Exception as e:
        print(f"Failed to download data for {crypto}: {e}")
        failed_cryptos.append(crypto)

# Check if any cryptocurrencies failed to download
if len(failed_cryptos) > 0:
    print(f"The following cryptocurrencies failed to download: {failed_cryptos}")
    print("Ensure all symbols are valid and available in Yahoo Finance.")
    print("Stopping the script to allow for corrections.")
    exit()


all_crypto_data.reset_index(inplace=True)

# Save to a CSV file
output_file = "crypto_complete_data_1_year.csv"
all_crypto_data.to_csv(output_file, index=False)

print(f"Data for 30 cryptocurrencies (with Open, High, Low, Adj Close, Volume) saved to: {output_file}")

[*********************100%***********************]  1 of 1 completed

Downloading data for BTC-USD...
Downloading data for ETH-USD...



[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Downloading data for ADA-USD...
Downloading data for BNB-USD...
Downloading data for XRP-USD...
Downloading data for SOL-USD...


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Downloading data for DOT-USD...
Downloading data for DOGE-USD...
Downloading data for MATIC-USD...
Downloading data for LTC-USD...


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Downloading data for BCH-USD...
Downloading data for TRX-USD...
Downloading data for AVAX-USD...
Downloading data for LINK-USD...


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Downloading data for ATOM-USD...
Downloading data for XLM-USD...
Downloading data for ALGO-USD...
Downloading data for VET-USD...


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Downloading data for XTZ-USD...
Downloading data for ARB-USD...
Downloading data for OP-USD...
Downloading data for APT-USD...


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Downloading data for SUI-USD...
Downloading data for FTM-USD...
Downloading data for NEAR-USD...
Data for 30 cryptocurrencies (with Open, High, Low, Adj Close, Volume) saved to: crypto_complete_data_1_year.csv


In [2]:
import pandas as pd

# Load the dataset
input_file = "crypto_complete_data_1_year.csv"  
crypto_data = pd.read_csv(input_file)

# Convert 'Date' column to datetime and remove the time component
if 'Date' in crypto_data.columns:
    crypto_data['Date'] = pd.to_datetime(crypto_data['Date']).dt.date  

#  Drop duplicate rows
crypto_data = crypto_data.drop_duplicates()

#  Save the cleaned dataset 
output_file = "crypto_cleaned_data.csv"
crypto_data.to_csv(output_file, index=False)

print(f"Cleaned dataset saved to: {output_file}")

Cleaned dataset saved to: crypto_cleaned_data.csv


In [3]:
#FOR GUI IMPLEMENTATION

import yfinance as yf
import pandas as pd


def get_crypto_data_realtime(crypto_name, start_date=None, end_date=None):
    """
    Fetch, preprocess, and clean real-time historical data for the given cryptocurrency using Yahoo Finance.

    

    Returns:
        pandas.DataFrame: Preprocessed DataFrame containing Date, open, high, low, close, volume, and moving averages.
    """
    # Define valid cryptocurrency symbols for Yahoo Finance
    symbol_map = {
        "LTC-USD": "LTC-USD",
        "BTC-USD": "BTC-USD",
        "ETH-USD": "ETH-USD",
        "BCH-USD": "BCH-USD",
    }

    # Validate the cryptocurrency name
    symbol = symbol_map.get(crypto_name)
    if not symbol:
        raise ValueError(f"Invalid cryptocurrency name: {crypto_name}. Please select a valid cryptocurrency.")

    # Fetch historical data using Yahoo Finance
    try:
        crypto_data = yf.download(symbol, start=start_date, end=end_date, progress=False)
    except Exception as e:
        raise Exception(f"Failed to fetch data: {e}")

    if crypto_data.empty:
        raise ValueError(f"No data found for {crypto_name} in the selected date range.")

    # Flatten MultiIndex 
    if isinstance(crypto_data.columns, pd.MultiIndex):
        crypto_data.columns = ["_".join(col).strip() for col in crypto_data.columns]

    # Reset index and rename columns
    crypto_data = crypto_data.reset_index()
    expected_columns = {
        "Date": "Date",
        f"Open_{crypto_name}": "open",
        f"High_{crypto_name}": "high",
        f"Low_{crypto_name}": "low",
        f"Close_{crypto_name}": "close",
        f"Adj Close_{crypto_name}": "adj_close",
        f"Volume_{crypto_name}": "volume",
    }
    crypto_data.rename(columns=expected_columns, inplace=True)

    # only necessary columns
    necessary_columns = ["Date", "open", "high", "low", "close", "volume"]
    crypto_data = crypto_data[necessary_columns]

    # Convert Date to datetime and remove the time component
    crypto_data["Date"] = pd.to_datetime(crypto_data["Date"]).dt.date

    # Ensure numeric columns
    numeric_columns = ["open", "high", "low", "close", "volume"]
    crypto_data[numeric_columns] = crypto_data[numeric_columns].apply(pd.to_numeric, errors="coerce")

    # Drop rows with missing or invalid data
    crypto_data.dropna(subset=["Date", "open", "high", "low", "close"], inplace=True)

    
    print("First few rows of the 'close' column:")
    print(crypto_data["close"].head())

    # Add moving averages
    crypto_data["7-Day MA"] = crypto_data["close"].rolling(window=7).mean()
    crypto_data["14-Day MA"] = crypto_data["close"].rolling(window=14).mean()
    crypto_data["30-Day MA"] = crypto_data["close"].rolling(window=30).mean()

    
    print("\nFirst 10 rows of the 7-Day MA computation:")
    print(crypto_data[["close", "7-Day MA"]].head(10))

    # Handle NaN values in Moving Averages using forward fill and backward fill
    crypto_data[["7-Day MA", "14-Day MA", "30-Day MA"]] = crypto_data[
        ["7-Day MA", "14-Day MA", "30-Day MA"]
    ].fillna(method="bfill").fillna(method="ffill")

    return crypto_data

In [10]:
# Import necessary libraries
import pandas as pd
from data_fetcher import get_crypto_data_realtime

# Fetch cryptocurrency data
crypto_data = get_crypto_data_realtime(
    crypto_name="BTC-USD",
    start_date="2023-01-01",
    end_date="2023-12-31"
)

# Print the cleaned data
print("Cleaned Data:")
print(crypto_data.head())
print("Available columns in the dataset:")
print(list(crypto_data.columns))

# Check for NaN values in moving averages
nan_values = crypto_data[["7-Day MA", "14-Day MA", "30-Day MA"]].isna().sum()

# Print the number of NaN values in moving averages
print("\nNumber of NaN values in Moving Averages:")
print(nan_values)

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Downloading data for BTC-USD...
Downloading data for ETH-USD...
Downloading data for ADA-USD...
Downloading data for BNB-USD...
Downloading data for XRP-USD...
Downloading data for SOL-USD...
Downloading data for DOT-USD...
Downloading data for DOGE-USD...
Downloading data for MATIC-USD...
Downloading data for LTC-USD...
Downloading data for BCH-USD...
Downloading data for TRX-USD...



[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Downloading data for AVAX-USD...
Downloading data for LINK-USD...
Downloading data for ATOM-USD...
Downloading data for XLM-USD...
Downloading data for ALGO-USD...
Downloading data for VET-USD...
Downloading data for XTZ-USD...
Downloading data for ARB-USD...
Downloading data for OP-USD...
Downloading data for APT-USD...
Downloading data for SUI-USD...
Downloading data for FTM-USD...
Downloading data for NEAR-USD...


[*********************100%***********************]  1 of 1 completed
  ].fillna(method="bfill").fillna(method="ffill")


Data for 30 cryptocurrencies (with Open, High, Low, Adj Close, Volume) saved to: crypto_complete_data_1_year.csv
Cleaned dataset saved to: crypto_cleaned_data.csv
First few rows of the 'close' column:
0    16625.080078
1    16688.470703
2    16679.857422
3    16863.238281
4    16836.736328
Name: close, dtype: float64

First 10 rows of the 7-Day MA computation:
          close      7-Day MA
0  16625.080078           NaN
1  16688.470703           NaN
2  16679.857422           NaN
3  16863.238281           NaN
4  16836.736328           NaN
5  16951.968750           NaN
6  16955.078125  16800.061384
7  17091.144531  16866.642020
8  17196.554688  16939.225446
9  17446.292969  17048.716239
Cleaned Data:
         Date          open          high           low         close  \
0  2023-01-01  16547.914062  16630.439453  16521.234375  16625.080078   
1  2023-01-02  16625.509766  16759.343750  16572.228516  16688.470703   
2  2023-01-03  16688.847656  16760.447266  16622.371094  16679.857422   
3

  ].fillna(method="bfill").fillna(method="ffill")
