In [25]:
import requests
import pandas as pd
import time
from datetime import datetime

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score



In [26]:


# Step 1: Get top 10 coins by market cap
def get_top_10_coins():
    url = "https://api.coingecko.com/api/v3/coins/markets"
    params = {
        'vs_currency': 'usd',
        'order': 'market_cap_desc',
        'per_page': 10,
        'page': 1
    }
    response = requests.get(url, params=params)
    data = response.json()
    return [coin['id'] for coin in data]

# Step 2: Fetch historical data for a given coin
def fetch_365_day_data(coin_id):
    url = f"https://api.coingecko.com/api/v3/coins/{coin_id}/market_chart"
    params = {
        'vs_currency': 'usd',
        'days': 365,
        'interval': 'daily'
    }
    response = requests.get(url, params=params)
    data = response.json()
    
    prices = data.get('prices', [])
    market_caps = data.get('market_caps', [])
    volumes = data.get('total_volumes', [])

    df = pd.DataFrame(prices, columns=['timestamp', 'price'])
    df['market_cap'] = [x[1] for x in market_caps]
    df['volume'] = [x[1] for x in volumes]
    df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
    df['coin'] = coin_id
    return df

# Step 3: Fetch data for all top 10 coins
top_coins = get_top_10_coins()
print("Top 10 coins:", top_coins)

all_data = []

for coin in top_coins:
    print(f"Fetching data for {coin}...")
    try:
        df = fetch_365_day_data(coin)
        all_data.append(df)
        time.sleep(1.5)  # Respect API rate limit
    except Exception as e:
        print(f"Error fetching {coin}: {e}")

# Step 4: Combine into one DataFrame
combined_df = pd.concat(all_data, ignore_index=True)
combined_df.to_csv('top10_crypto_365days.csv', index=False)

# Show preview
combined_df.head()


Top 10 coins: ['bitcoin', 'ethereum', 'tether', 'ripple', 'binancecoin', 'solana', 'usd-coin', 'dogecoin', 'cardano', 'tron']
Fetching data for bitcoin...
Fetching data for ethereum...
Fetching data for tether...
Fetching data for ripple...
Fetching data for binancecoin...
Fetching data for solana...
Fetching data for usd-coin...
Fetching data for dogecoin...
Fetching data for cardano...
Fetching data for tron...


  combined_df = pd.concat(all_data, ignore_index=True)


Unnamed: 0,timestamp,price,market_cap,volume,coin
0,2024-04-01,71246.951441,1401370000000.0,19723010000.0,bitcoin
1,2024-04-02,69785.925038,1373784000000.0,36040090000.0,bitcoin
2,2024-04-03,65440.419226,1288361000000.0,45161130000.0,bitcoin
3,2024-04-04,66123.938539,1301670000000.0,35559950000.0,bitcoin
4,2024-04-05,68542.177153,1348456000000.0,37516850000.0,bitcoin


In [27]:
df = pd.read_csv('top10_crypto_365days.csv', parse_dates=['timestamp'])

# Sort data per coin and timestamp
df = df.sort_values(by=['coin', 'timestamp'])

# Create future price column
df['future_price_60d'] = df.groupby('coin')['price'].shift(-60)

# Define success label
df['success'] = (df['future_price_60d'] >= 2 * df['price']).astype(int)


In [28]:
# Price momentum
df['price_change_7d'] = df.groupby('coin')['price'].pct_change(7)
df['price_change_30d'] = df.groupby('coin')['price'].pct_change(30)

# Volume trends
df['volume_change_7d'] = df.groupby('coin')['volume'].pct_change(7)
df['volume_rolling_mean_7d'] = df.groupby('coin')['volume'].rolling(window=7).mean().reset_index(0, drop=True)

# Volatility (standard deviation of price)
df['volatility_7d'] = df.groupby('coin')['price'].rolling(window=7).std().reset_index(0, drop=True)

# Moving average comparison
df['ma_7'] = df.groupby('coin')['price'].rolling(window=7).mean().reset_index(0, drop=True)
df['ma_30'] = df.groupby('coin')['price'].rolling(window=30).mean().reset_index(0, drop=True)
df['ma_diff'] = df['ma_7'] - df['ma_30']


In [29]:
# Drop NaNs from feature calculation
df_clean = df.dropna(subset=[
    'price_change_7d', 'price_change_30d',
    'volume_change_7d', 'volatility_7d',
    'ma_diff', 'success'
])

# Select the features we created earlier
features = [
    'price_change_7d',
    'price_change_30d',
    'volume_change_7d',
    'volume_rolling_mean_7d',
    'volatility_7d',
    'ma_diff'
]

X = df_clean[features]
y = df_clean['success']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)