# **(Crypto Currency Future Price Forecast EDA)**

## Objectives

- Load raw cryptocurrency data from CSV files
- Perform exploratory data analysis (EDA) to uncover insights
- Load my single cleaned dataset: DataSet/Cleaned/crypto_clean.csv.
- Check shape, coins, date ranges, duplicates
- Look at recent behaviour (last 365 days): returns, risk (volatility/ATR), relationships (correlations).
- Create a signal snapshot (Buy / Hold / Sell indications) based on RSI/MACD/MA trend.
- Save tidy tables to Reports/ for my README and dashboard.


---

# Change working directory

* We are assuming you will store the notebooks in a subfolder, therefore when running the notebook in the editor, you will need to change the working directory

We need to change the working directory from its current folder to its parent folder
* We access the current directory with os.getcwd()

In [1]:
import os
current_dir = os.getcwd()
current_dir

'c:\\Users\\Nine\\OneDrive\\Documents\\VS Code Projects\\Crypto-Currency-Future-Price-Forecast\\jupyter_notebooks'

We want to make the parent of the current directory the new current directory
* os.path.dirname() gets the parent directory
* os.chir() defines the new current directory

In [2]:
os.chdir(os.path.dirname(current_dir))
print("You set a new current directory")

You set a new current directory


Confirm the new current directory

In [3]:
current_dir = os.getcwd()
current_dir

'c:\\Users\\Nine\\OneDrive\\Documents\\VS Code Projects\\Crypto-Currency-Future-Price-Forecast'

# Section 1

Load Libraries and Data

In [4]:
import pandas as pd
import numpy as np
from pathlib import Path
import matplotlib.pyplot as plt
import seaborn as sns



---

# Section 2

---

In [5]:
# Load data from csv files

CLEAN = Path("DataSet/Cleaned/crypto_clean.csv")
REPORTS = Path("Reports")
REPORTS.mkdir(exist_ok=True)

df = pd.read_csv(CLEAN, parse_dates=["Date"]).sort_values(["Symbol","Date"]).reset_index(drop=True)
print(df.shape)
df.head()

(27898, 18)


Unnamed: 0,Symbol,Date,Open,High,Low,Close,return_1d,ma_7,ma_30,rsi_14,macd_line,macd_signal,macd_hist,bb_ma20,bb_upper,bb_lower,bb_width,atr_14
0,BTC,2010-07-17,0.04951,0.04951,0.04951,0.04951,,,,,0.0,0.0,0.0,,,,,
1,BTC,2010-07-18,0.04951,0.08585,0.04951,0.08584,0.733791,,,,0.002898,0.00058,0.002318,,,,,
2,BTC,2010-07-19,0.08584,0.09307,0.07723,0.0808,-0.058714,0.07205,,,0.004734,0.00141,0.003323,,,,,
3,BTC,2010-07-20,0.0808,0.08181,0.07426,0.07474,-0.075,0.072722,,,0.005634,0.002255,0.003379,,,,,
4,BTC,2010-07-21,0.07474,0.07921,0.06634,0.07921,0.059807,0.07402,,,0.006632,0.003131,0.003502,,,,,


In [6]:
# Validation check


print("Coins:", sorted(df["Symbol"].unique()))

# Coverage table
coverage = (df.groupby("Symbol")
              .agg(first_date=("Date","min"),
                   last_date=("Date","max"),
                   rows=("Date","count"))
              .reset_index())
coverage


Coins: ['BTC', 'DOGE', 'ETH', 'HBAR', 'QNT', 'SOL', 'XDC', 'XLM', 'XRP']


Unnamed: 0,Symbol,first_date,last_date,rows
0,BTC,2010-07-17,2025-10-14,5521
1,DOGE,2016-07-01,2025-10-14,3345
2,ETH,2015-08-07,2025-10-14,3674
3,HBAR,2019-09-20,2025-10-14,2169
4,QNT,2019-02-05,2025-10-14,2396
5,SOL,2020-04-10,2025-10-14,1966
6,XDC,2020-04-02,2025-10-14,1813
7,XLM,2017-01-17,2025-10-14,3145
8,XRP,2015-01-21,2025-10-14,3869


In [7]:
# Duplicates check

# Duplicates check on (Symbol, Date)
dups = df.duplicated(subset=["Symbol","Date"]).sum()
print("Duplicate (Symbol,Date) rows:", dups)

Duplicate (Symbol,Date) rows: 0


In [8]:
# Gather data from the last 1826 days (5 years to include previous bull market)


cutoff = df["Date"].max() - pd.Timedelta(days=1826)  # 5 years
recent = df[df["Date"] >= cutoff].copy()
print("Recent window:", cutoff.date(), "to", df["Date"].max().date())

Recent window: 2020-10-14 to 2025-10-14


In [9]:

# performance & risk stats
# Daily returns summary
ret_summary = (recent.groupby("Symbol")["return_1d"]
               .agg(count="count", mean="mean", std="std",
                    min="min", median="median", max="max")
               .reset_index())

# Average ATR(14) and Bollinger width (risk proxies)
risk_summary = (recent.groupby("Symbol")
                .agg(avg_atr14=("atr_14","mean"),
                     avg_bb_width=("bb_width","mean"))
                .reset_index())

# Merge into one simple KPI table
kpis = (ret_summary.merge(risk_summary, on="Symbol", how="left")
        .sort_values(["mean","std"], ascending=[False, True])
        .reset_index(drop=True))

kpis


Unnamed: 0,Symbol,count,mean,std,min,median,max,avg_atr14,avg_bb_width
0,DOGE,1779,0.005439,0.094605,-0.384645,-0.000836,2.565113,0.013072,0.392171
1,SOL,1779,0.004393,0.061933,-0.444644,0.000408,0.373539,6.944898,0.404182
2,XDC,1733,0.003223,0.059951,-0.218544,-0.002301,0.79814,0.003911,0.329802
3,XRP,1776,0.002822,0.057136,-0.415469,1.6e-05,0.685225,0.065648,0.318762
4,QNT,1779,0.002795,0.058849,-0.306665,-0.001711,0.841367,8.647304,0.324718
5,HBAR,1779,0.002558,0.058197,-0.25143,-0.001656,0.4598,0.013156,0.343577
6,ETH,1779,0.002186,0.04128,-0.236596,0.000994,0.285306,137.84174,0.271518
7,XLM,1779,0.00217,0.053625,-0.261418,-0.000556,0.586032,0.016627,0.303975
8,BTC,1779,0.00176,0.030738,-0.15745,0.000427,0.150395,2085.338517,0.20465


In [10]:
# Daily correlations saved to reports folder

ret_pivot = recent.pivot_table(index="Date", columns="Symbol", values="return_1d")
corr = ret_pivot.corr()
corr

corr.to_csv(REPORTS / "eda_return_correlation_365d.csv")
print("Saved -> Reports/eda_return_correlation_365d.csv")



Saved -> Reports/eda_return_correlation_365d.csv


In [11]:
# Trend and momentum analysis

# Latest row per coin (most recent date)
latest = (recent.sort_values(["Symbol","Date"])
          .groupby("Symbol")
          .tail(1)
          .reset_index(drop=True))

latest["trend_up"] = latest["ma_7"] > latest["ma_30"]
latest["macd_pos"] = latest["macd_line"] > latest["macd_signal"]

# RSI bands (for info)
latest["rsi_band"] = pd.cut(
    latest["rsi_14"],
    bins=[-np.inf,30,50,70,np.inf],
    labels=["oversold(<30)","neutral(30-50)","caution(50-70)","overbought(>70)"]
)

# Indication
cond_buy  = latest["trend_up"] & latest["macd_pos"] & (latest["rsi_14"] < 60)
cond_sell = (~latest["trend_up"]) & (~latest["macd_pos"]) & (latest["rsi_14"] > 50)

latest["indication"] = np.select(
    [cond_buy, cond_sell],
    ["BUY", "SELL"],
    default="HOLD"
)

cols = ["Symbol","Date","Close","ma_7","ma_30","rsi_14","macd_line","macd_signal","trend_up","macd_pos","rsi_band","indication"]
signal_snapshot = latest[cols].sort_values("Symbol").reset_index(drop=True)
signal_snapshot


Unnamed: 0,Symbol,Date,Close,ma_7,ma_30,rsi_14,macd_line,macd_signal,trend_up,macd_pos,rsi_band,indication
0,BTC,2025-10-14,113196.6,116360.514286,116217.613333,48.55151,207.12695,1094.169379,True,False,neutral(30-50),HOLD
1,DOGE,2025-10-14,0.203692,0.214987,0.243295,42.942782,-0.010099,-0.004658,False,False,neutral(30-50),HOLD
2,ETH,2025-10-14,4120.361,4161.532429,4305.0037,49.254076,-52.798346,-23.06499,False,False,neutral(30-50),HOLD
3,HBAR,2025-10-14,0.188223,0.189973,0.217411,40.49878,-0.011314,-0.008276,False,False,neutral(30-50),HOLD
4,QNT,2025-10-14,89.72484,93.250864,98.301562,35.787155,-2.500649,-1.206625,False,False,neutral(30-50),HOLD
5,SOL,2025-10-14,201.3507,203.8009,219.52701,47.036943,-4.741334,-1.780827,False,False,neutral(30-50),HOLD
6,XDC,2025-10-14,0.060637,0.065003,0.072856,29.709337,-0.003828,-0.002509,False,False,oversold(<30),HOLD
7,XLM,2025-10-14,0.336802,0.345948,0.37393,44.441987,-0.010889,-0.005579,False,False,neutral(30-50),HOLD
8,XRP,2025-10-14,2.4968,2.584217,2.855237,37.594345,-0.10911,-0.066034,False,False,neutral(30-50),HOLD


In [12]:
signal_snapshot.to_csv(REPORTS / "eda_signal_snapshot_latest.csv", index=False)
print("Saved -> Reports/eda_signal_snapshot_latest.csv")


Saved -> Reports/eda_signal_snapshot_latest.csv


In [13]:
# Minimal columns for visuals
viz = recent[["Symbol","Date","Close","ma_7","ma_30","rsi_14","macd_line","macd_signal","bb_upper","bb_lower","atr_14"]].copy()
viz.to_csv(REPORTS / "eda_viz_timeseries_365d.csv", index=False)
print("Saved -> Reports/eda_viz_timeseries_365d.csv")


Saved -> Reports/eda_viz_timeseries_365d.csv


---