1) DATA FETCHING & CLEANING

In [None]:
import yfinance as yf
import pandas as pd
from datetime import datetime, timedelta
import numpy as np

# Define the list of top 30 cryptocurrencies
crypto_list = [
    'BTC-USD', 'ETH-USD', 'XRP-USD', 'LTC-USD', 'BCH-USD', 'ADA-USD', 'DOT-USD',
    'BNB-USD', 'LINK-USD', 'XLM-USD', 'DOGE-USD', 'UNI-USD', 'AAVE-USD', 'ATOM-USD',
    'AVAX-USD', 'MATIC-USD', 'SOL-USD', 'CHR-USD', 'ALGO-USD', 'FTT-USD', 'VET-USD',
    'FIL-USD', 'TRX-USD', 'ETC-USD', 'XMR-USD', 'EOS-USD', 'THETA-USD', 'NEO-USD',
    'DASH-USD', 'ZEC-USD'
]

# Fetch data for the last 5 years
def fetch_crypto_data(cryptos, years=5):
    end_date = datetime.now()
    start_date = end_date - timedelta(days=years * 365)  # Adjust for 5 years
    data = yf.download(cryptos, start=start_date, end=end_date)['Close']
    return data
    
crypto_data = fetch_crypto_data(crypto_list)
# Handle missing values (fill forward and backward)
crypto_data.ffill(axis=0, inplace=True)  
crypto_data.bfill(axis=0, inplace=True)  

# Save the cleaned data
crypto_data.to_csv('Crypto.csv', index=True)
# Adjust pandas display options to show all columns
pd.set_option('display.max_columns', None)  # Show all columns
pd.set_option('display.width', None)        # No line breaks


In [None]:
import pandas as pd

# Load the dataset (replace with your file path)
crypto_data = pd.read_csv("Crypto.csv")

# Compute the range (min and max) for each cryptocurrency
crypto_ranges = crypto_data.describe().loc[['min', 'max']].transpose()

# Add a 10% buffer to min and max ranges
crypto_ranges['min_with_buffer'] = crypto_ranges['min'] * 0.9  
crypto_ranges['max_with_buffer'] = crypto_ranges['max'] * 1.1  
# Save or display the ranges
crypto_ranges.to_csv("Crypto_Ranges.csv")
print(crypto_ranges)


In [None]:
# Filter anomalies based on per-cryptocurrency thresholds
def filter_anomalies_by_ticker(data, crypto_ranges):
    """
    Filters out anomalies for each cryptocurrency based on provided ranges.
    - data: DataFrame with cryptocurrencies as columns.
    - crypto_ranges: Dictionary with {ticker: (min, max)} ranges for valid prices.
    """
    filtered_data = pd.DataFrame()
    for ticker, (min_price, max_price) in crypto_ranges.items():
        if ticker in data.columns:
            filtered_data[ticker] = data[ticker].where(
                (data[ticker] >= min_price) & (data[ticker] <= max_price)
            )
    return filtered_data.dropna(axis=1, how="all")  # Drop columns entirely NaN

# Define reasonable ranges for each cryptocurrency
crypto_price_ranges = {
    'BTC-USD': (4473.71, 116754.66),  
    'ETH-USD': (99.55, 5293.30),    
    'XRP-USD': (0.13, 2.99),        
    'LTC-USD': (27.84, 425.10),
    'AAVE-USD': (0.46, 695.49),
    'ALGO-USD': (0.08, 2.62),
    'ATOM-USD': (1.48, 49.00),
    'AVAX-USD': (2.62, 147.98),
    'BCH-USD': (80.42, 1696.67),
    'BNB-USD': (8.45, 825.30),
    'CHR-USD': (0.01, 1.550),
    'DASH-USD': (19.31, 484.98),
    'DOGE-USD': (0.0014, 0.75),
    'DOT-USD': (2.59, 59.27),
    'EOS-USD': (0.37, 15.80),
    'ETC-USD': (3.57,147.51),
    'FIL-USD': (2.18,210.49),
    'FTT-USD': (0.73, 87.86),
    'LINK-USD': (1.57, 57.42),
    'MATIC-USD': (0.0073, 3.16),
    'NEO-USD': (4.84, 134.95),
    'SOL-USD': (0.46, 284.83), 
    'THETA-USD': (0.05, 15.71),
    'TRX-USD': (0.0079, 0.47),	
    'UNI-USD': (0.00003, 0.66),	
    'VET-USD': (0.0020, 0.28),
    'XLM-USD': (0.03, 0.80), 
    'XMR-USD': (29.71, 531.94),
    'ZEC-USD': (16.46, 350.81),	
    'ADA-USD': (0.02, 3.27)    
}

# Apply the anomaly filtering
crypto_data_cleaned = filter_anomalies_by_ticker(crypto_data, crypto_price_ranges)

# Load the data with proper headers and indices
file_path = "Crypto.csv"  # Replace with your file path
crypto_data = pd.read_csv(file_path, header=0, index_col=0)

# Transpose the data to make cryptocurrencies rows and dates columns
crypto_data_transposed = crypto_data.transpose()

# Save the transposed data for clustering
crypto_data_transposed.to_csv("Crypto_For_Clustering.csv", index=True)

# Load the data and set 'Unnamed: 0' as the index
crypto_data = pd.read_csv("Crypto_For_Clustering.csv", index_col=0)

# Save the cleaned data
crypto_data_transposed.to_csv('Cleaned_Crypto.csv', index=True)

2) CLUSTERING: 3 Clusters Based on proxmitiy on centroids, 1 Cluster based on recent growth

In [None]:
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
import numpy as np
import matplotlib.pyplot as plt

# Step 2a: Load the updated 5-year dataset
file_path = 'C:/Users/Ahmed/Desktop/APPLIED AI/Cleaned_Crypto.csv'
crypto_data = pd.read_csv(file_path)

# Step 2b: Check if Tickers are in the index or a column
if "Unnamed: 0" in crypto_data.columns:
    crypto_data.rename(columns={"Unnamed: 0": "Ticker"}, inplace=True)  
    crypto_data.set_index("Ticker", inplace=True)  

# Prepare the data for PCA (use numerical price data only)
price_data = crypto_data.copy()

# Standardize the data
scaler = StandardScaler()
standardized_data = scaler.fit_transform(price_data)

# Apply PCA (retain 2 components for visualization and clustering)
pca = PCA(n_components=2)
pca_result = pca.fit_transform(standardized_data)

# Step: Apply K-Means clustering
kmeans = KMeans(n_clusters=4, random_state=42)
clusters = kmeans.fit_predict(pca_result)

crypto_data["Cluster"] = clusters

plt.figure(figsize=(10, 6))
plt.scatter(pca_result[:, 0], pca_result[:, 1], c=clusters, cmap='viridis', s=50)
plt.title("Cryptocurrency Clusters (Standardized Data + PCA)")
plt.xlabel("PCA Component 1")
plt.ylabel("PCA Component 2")
plt.colorbar(label="Cluster")
plt.show()

# Select representatives based on proximity to centroids
cluster_centroids = kmeans.cluster_centers_
representatives = []
for i in range(4):  # For each cluster
    cluster_indices = np.where(clusters == i)[0]
    distances = np.linalg.norm(pca_result[cluster_indices] - cluster_centroids[i], axis=1)
    closest_index = cluster_indices[np.argmin(distances)]
    representatives.append(price_data.index[closest_index])

# Refine representative for Cluster 0 based on recent growth
# Find cryptocurrency in Cluster 0 with the highest recent growth
cluster_0_indices = crypto_data[crypto_data["Cluster"] == 0].index
cluster_0_prices = price_data.loc[cluster_0_indices]

# Calculate growth over the last year (e.g., last 365 days)
recent_growth = cluster_0_prices.iloc[:, -365:].mean(axis=1)
representative_high_growth = recent_growth.idxmax()

# Update the representative for Cluster 0
representatives[0] = representative_high_growth

# Print the final selected representatives
print("Final Selected Representatives for Each Cluster:")
for cluster_id, representative in enumerate(representatives):
    print(f"Cluster {cluster_id}: {representative}")

# Save the updated dataset with cluster information
crypto_data.to_csv('C:/Users/Ahmed/Desktop/APPLIED AI/Clustering.csv', index=True)


3) CORRELATION

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# File path for your input data
file_path = "C:/Users/Ahmed/Desktop/COM724/Clustering.csv" 
data = pd.read_csv(file_path)

if "Cluster" in data.columns:
    data = data.drop(columns=["Cluster"])

if "Ticker" in data.columns:
    data = data.set_index("Ticker")

#: Transpose the data
# Transpose so that rows become dates and columns become cryptocurrency tickers
price_data_t = data.transpose()

#  Compute the correlation matrix
# Calculate pairwise correlations between cryptocurrencies
correlation_matrix = price_data_t.corr()

#  Define specific representatives for correlation analysis
representatives = ["SOL-USD", "BTC-USD", "ETH-USD", "XMR-USD"]  # Focused tickers

# Step 3d: Verify representatives exist in the correlation matrix
valid_representatives = [rep for rep in representatives if rep in correlation_matrix.columns]
print("Valid Representatives:", valid_representatives)

# Step 3e: Extract top correlations for the specific representatives
correlation_results = []  # List to store results for CSV export
top_correlations = {}  # For in-memory display

for rep in valid_representatives:
    # Get correlations for the representative
    correlations = correlation_matrix[rep]

    # Sort and extract top 4 positive and top 4 negative correlations
    top_positive = correlations.sort_values(ascending=False).iloc[1:5]  # Skip self-correlation
    top_negative = correlations.sort_values(ascending=True).iloc[:4]

    # Store top correlations in a dictionary for display
    top_correlations[rep] = {
        "Top Positive": top_positive,
        "Top Negative": top_negative
    }

    # Store results for CSV export
    for ticker, value in top_positive.items():
        correlation_results.append({
            "Representative": rep,
            "Type": "Top Positive",
            "Ticker": ticker,
            "Correlation": value
        })
    for ticker, value in top_negative.items():
        correlation_results.append({
            "Representative": rep,
            "Type": "Top Negative",
            "Ticker": ticker,
            "Correlation": value
        })

# Step 3f: Display results
for rep, corr in top_correlations.items():
    print(f"Representative: {rep}")
    print("Top Positive Correlations:")
    print(corr["Top Positive"])
    print("Top Negative Correlations:")
    print(corr["Top Negative"])
    print("\n")

# Step 3g: Save correlation results to a DataFrame and export as CSV
correlation_results_df = pd.DataFrame(correlation_results)

# Step 4: Visualize the correlation matrix as a heatmap (only for the selected representatives)
plt.figure(figsize=(12, 8))
sns.heatmap(correlation_matrix, cmap="coolwarm", annot=False)
plt.title("Cryptocurrency Correlation Heatmap")
plt.show()


4) EXPLORATORY DATA ANALYSIS

1. Plot Historical Price Trends


In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import matplotlib.dates as mdates

# Ensure the column names (dates) are in datetime format
price_data.columns = pd.to_datetime(price_data.columns, errors='coerce')

# Plot historical price trends for each representative
for rep in representatives:  # Assuming 'representatives' is a list of tickers
    plt.figure(figsize=(12, 6))
    
    # Plot the data for the current ticker
    plt.plot(price_data.columns, price_data.loc[rep], linewidth=2, label=f"Price Trend for {rep}")
    
    # Add a title and labels
    plt.title(f"Historical Price Trend: {rep}", fontsize=16)
    plt.xlabel("Date", fontsize=14)
    plt.ylabel("Price", fontsize=14)
    
    # Format the x-axis for dates
    plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
    plt.gca().xaxis.set_major_locator(mdates.MonthLocator(interval=6))  # Show ticks every 6 months
    plt.xticks(rotation=45, fontsize=10)
    
    # Add grid and legend
    plt.grid(color='lightgray', linestyle='--', linewidth=0.5)
    plt.legend(loc="upper left", fontsize=12)
    
    # Ensure the layout is tight
    plt.tight_layout()
    
    # Show the plot
    plt.show()


2. Analyze Price Distributions


In [None]:
import seaborn as sns

for rep in representatives:
    plt.figure(figsize=(12, 6))
    sns.histplot(price_data.loc[rep], bins=50, kde=True, label=f"Price Distribution for {rep}")
    plt.title(f"Price Distribution: {rep}")
    plt.xlabel("Price")
    plt.ylabel("Frequency")
    plt.legend()
    plt.show()

    # Boxplot for price distribution
    plt.figure(figsize=(6, 4))
    sns.boxplot(x=price_data.loc[rep])
    plt.title(f"Boxplot of Prices: {rep}")
    plt.xlabel("Price")
    plt.show()


3. Analyze Returns (Daily)


In [None]:
# Calculate daily percentage changes (returns)
price_returns = price_data.pct_change()

for rep in representatives:
    plt.figure(figsize=(12, 6))
    plt.plot(price_returns.loc[rep], label=f"Daily Returns for {rep}")
    plt.title(f"Daily Percentage Returns: {rep}")
    plt.xlabel("Date")
    plt.ylabel("Daily Returns")
    plt.legend()
    plt.grid()
    plt.show()


4. Distribution of Returns

In [None]:
# Calculate daily returns (percentage changes)
returns = price_data.pct_change()

In [None]:
for rep in representatives:
    plt.figure(figsize=(12, 6))
    sns.histplot(returns.loc[rep].dropna(), bins=50, kde=True, label=f"Daily Returns: {rep}")
    plt.title(f"Distribution of Daily Returns: {rep}")
    plt.xlabel("Daily Return (%)")
    plt.ylabel("Frequency")
    plt.legend()
    plt.grid()
    plt.show()

for rep in representatives:
    plt.figure(figsize=(6, 4))
    sns.boxplot(x=returns.loc[rep].dropna())  # Drop NaN values
    plt.title(f"Boxplot of Daily Returns: {rep}")
    plt.xlabel("Daily Return (%)")
    plt.grid()
    plt.show()


MACHINE LEARNING MODELS

FACEBOOK PROPHET

In [None]:
!pip install prophet


In [None]:
from prophet import Prophet
import pandas as pd
import matplotlib.pyplot as plt

# Ensure representatives are defined
representatives = ['SOL-USD', 'BTC-USD', 'ETH-USD', 'XMR-USD']

# Remove non-date columns from price_data
price_data_dates = price_data.drop(columns=['Cluster'], errors='ignore')

# Prepare the data for Prophet
for rep in representatives:
    if rep in price_data_dates.index:  # Ensure the representative exists in price_data
        # Create a new DataFrame with required format
        data = price_data_dates.loc[rep].reset_index()
        data.columns = ['ds', 'y']  # Prophet requires 'ds' (date) and 'y' (value)

        # Ensure 'ds' is in datetime format
        data['ds'] = pd.to_datetime(data['ds'], errors='coerce')  # Convert to datetime
        data = data.dropna(subset=['ds'])  # Drop rows with invalid dates

        # Initialize the Prophet model and add custom seasonality
        model = Prophet(weekly_seasonality=False, yearly_seasonality=True)
        # Fit the model
        model.fit(data)

        # Create a future DataFrame for the forecast
        future = model.make_future_dataframe(periods=30)  # Predict for the next 30 days
        forecast = model.predict(future)

        # Plot the forecast
        plt.figure(figsize=(12, 6))
        model.plot(forecast)
        plt.title(f"Facebook Prophet Forecast for {rep}")
        plt.xlabel("Date")
        plt.ylabel("Price")
        plt.grid()
        plt.show()

        # Display the forecasted data
        print(f"Forecasted Prices for {rep}:\n", forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail(10))
    else:
        print(f"Representative {rep} not found in the data.")



In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np
from prophet import Prophet

# Define a function for evaluation
def evaluate_prophet_model(rep, price_data):
    # Prepare the data for Prophet
    # Drop non-date columns like 'Cluster' and reset index
    price_data_clean = price_data.drop(columns=['Cluster'], errors='ignore')
    data = price_data_clean.loc[rep].reset_index()
    data.columns = ['ds', 'y']  # Prophet requires 'ds' (date) and 'y' (value)

    # Ensure 'ds' is in datetime format
    data['ds'] = pd.to_datetime(data['ds'], errors='coerce')
    data = data.dropna(subset=['ds'])  # Drop rows with invalid dates

    # Train-test split (last 30 days as test set)
    train_data = data[:-30]
    test_data = data[-30:]

    # Train the Prophet model
    model = Prophet()
    model.fit(train_data)

    # Generate predictions for the test period
    future = model.make_future_dataframe(periods=30)
    forecast = model.predict(future)

    # Extract predictions for the test period
    forecast_test = forecast[forecast['ds'].isin(test_data['ds'])]
    predictions = forecast_test['yhat'].values
    actuals = test_data['y'].values

    # Calculate evaluation metrics
    mae = mean_absolute_error(actuals, predictions)
    rmse = np.sqrt(mean_squared_error(actuals, predictions))
    mape = np.mean(np.abs((actuals - predictions) / actuals)) * 100

    print(f"Evaluation Metrics for {rep}:")
    print(f"Mean Absolute Error (MAE): {mae:.2f}")
    print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")
    print(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%")
    print("\n")

# Evaluate for each representative
for rep in representatives:
    evaluate_prophet_model(rep, price_data)

ARIMA

In [None]:
!pip install pmdarima


In [None]:
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.stattools import adfuller
from sklearn.metrics import mean_absolute_error, mean_squared_error
from pmdarima import auto_arima
import numpy as np
import matplotlib.pyplot as plt

# Loop through representatives
for rep in representatives:
    data = price_data.loc[rep]

    # Drop the 'Cluster' entry if it exists
    if "Cluster" in data.index:
        data = data.drop(index="Cluster", errors="ignore")

    # Convert index to datetime
    data.index = pd.to_datetime(data.index, format='%Y-%m-%d', errors='coerce').dropna()
    
    # Split into training and testing sets
    train_data = data[:-30]
    test_data = data[-30:]

    # Stationarity check using ADF Test
    adf_test = adfuller(train_data)
    print(f"ADF Test for {rep}:")
    print(f"ADF Statistic: {adf_test[0]:.4f}, p-value: {adf_test[1]:.4f}")
    print("Stationary" if adf_test[1] < 0.05 else "Non-Stationary")

    # If non-stationary, apply differencing
    if adf_test[1] >= 0.05:
        train_data_diff = train_data.diff().dropna()
    else:
        train_data_diff = train_data

    

    auto_arima_model = auto_arima(train_data_diff, seasonal=False, trace=True)
    print(auto_arima_model.summary())

    # Use the optimal order from Auto ARIMA
    order = auto_arima_model.order
    print(f"Selected ARIMA Order: {order}")

# Fit ARIMA model using Auto ARIMA's suggested order
    arima_model = ARIMA(train_data_diff, order=order)
    arima_fit = arima_model.fit()

    # Forecast the next 30 days
    forecast_diff = arima_fit.forecast(steps=30)
    
    # Reverse differencing (if applied)
    forecast = forecast_diff.cumsum() + train_data.iloc[-1] if adf_test[1] >= 0.05 else forecast_diff
    forecast.index = test_data.index

    # Plot Training Data, Actual Test Data, and Forecast
    plt.figure(figsize=(12, 6))
    plt.plot(train_data.index, train_data, label='Training Data', color='blue')
    plt.plot(test_data.index, test_data, label='Actual Test Data', color='orange')
    plt.plot(forecast.index, forecast, label='ARIMA Forecast', linestyle='--', color='green')
    plt.title(f"ARIMA Forecast for {rep}")
    plt.xlabel("Date")
    plt.ylabel("Price")
    plt.legend()
    plt.grid()
    plt.show()

    # Evaluation Metrics
    mae = mean_absolute_error(test_data, forecast)
    rmse = np.sqrt(mean_squared_error(test_data, forecast))
    mape = np.mean(np.abs((test_data - forecast) / test_data)) * 100

    print(f"Evaluation Metrics for {rep}:")
    print(f"Mean Absolute Error (MAE): {mae:.2f}")
    print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")
    print(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%")
    print("\n")


Exponential Smoothing

In [None]:
!pip install statsmodels


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.holtwinters import ExponentialSmoothing

# Step 1: Load and prepare data
crypto_data = pd.read_csv("Clustering.csv")  # Replace with your CSV file name
crypto_data = crypto_data.set_index("Ticker")  # Set Ticker as index for filtering individual cryptocurrencies

# Specify the cryptocurrency for analysis
rep = "SOL-USD"  # Replace with your desired cryptocurrency

# Extract data for the chosen cryptocurrency
price_data = crypto_data.loc[rep].copy()

# Ensure index is in datetime format and clean the data
price_data_clean = price_data.drop(labels=["Cluster"], errors="ignore")  # Drop 'Cluster' if present
price_data_clean.index = pd.to_datetime(price_data_clean.index, errors='coerce', format='%Y-%m-%d')
price_data_clean = price_data_clean.dropna()  # Drop rows with invalid dates
price_data_clean = price_data_clean.sort_index()  # Sort by date

# Step 2: Train-test split
train_data = price_data_clean[:-30]  # Use all but the last 30 points for training
test_data = price_data_clean[-30:]   # Use the last 30 points for testing

# Step 3: Fit the Holt-Winters model
model = ExponentialSmoothing(
    train_data,
    trend="mul",  # Multiplicative trend
    seasonal=None,  # No seasonality
    initialization_method="estimated"  # Automatically initialize
)
hw_fit = model.fit()

# Step 4: Forecast the next 30 days
forecast = hw_fit.forecast(steps=30)
forecast.index = test_data.index  # Align forecast index with test data

# Step 5: Plot the results
plt.figure(figsize=(12, 6))
plt.plot(train_data.index, train_data, label="Training Data", color="blue")
plt.plot(test_data.index, test_data, label="Actual Test Data", color="orange")
plt.plot(forecast.index, forecast, label="Holt-Winters Forecast", linestyle="--", color="green")
plt.title(f"Holt-Winters Exponential Smoothing Forecast for {rep}")
plt.xlabel("Date")
plt.ylabel("Price")
plt.legend()
plt.grid()
plt.show()

# Step 6: Evaluate the model
from sklearn.metrics import mean_absolute_error, mean_squared_error

# Evaluation metrics
mae = mean_absolute_error(test_data, forecast)
rmse = mean_squared_error(test_data, forecast, squared=False)
mape = (abs((test_data - forecast) / test_data).mean()) * 100

print(f"Evaluation Metrics for {rep}:")
print(f"Mean Absolute Error (MAE): {mae:.2f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")
print(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%")


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.holtwinters import ExponentialSmoothing

# Step 1: Load and prepare data
crypto_data = pd.read_csv("Clustering.csv")  # Replace with your CSV file name
crypto_data = crypto_data.set_index("Ticker")  # Set Ticker as index for filtering individual cryptocurrencies

# Specify the cryptocurrency for analysis
rep = "BTC-USD"  # Replace with your desired cryptocurrency

# Extract data for the chosen cryptocurrency
price_data = crypto_data.loc[rep].copy()

# Ensure index is in datetime format and clean the data
price_data_clean = price_data.drop(labels=["Cluster"], errors="ignore")  # Drop 'Cluster' if present
price_data_clean.index = pd.to_datetime(price_data_clean.index, errors='coerce', format='%Y-%m-%d')
price_data_clean = price_data_clean.dropna()  # Drop rows with invalid dates
price_data_clean = price_data_clean.sort_index()  # Sort by date

# Step 2: Train-test split
train_data = price_data_clean[:-30]  # Use all but the last 30 points for training
test_data = price_data_clean[-30:]   # Use the last 30 points for testing

# Step 3: Fit the Holt-Winters model
model = ExponentialSmoothing(
    train_data,
    trend="mul",  # Multiplicative trend
    seasonal=None,  # No seasonality
    initialization_method="estimated"  # Automatically initialize
)
hw_fit = model.fit()

# Step 4: Forecast the next 30 days
forecast = hw_fit.forecast(steps=30)
forecast.index = test_data.index  # Align forecast index with test data

# Step 5: Plot the results
plt.figure(figsize=(12, 6))
plt.plot(train_data.index, train_data, label="Training Data", color="blue")
plt.plot(test_data.index, test_data, label="Actual Test Data", color="orange")
plt.plot(forecast.index, forecast, label="Holt-Winters Forecast", linestyle="--", color="green")
plt.title(f"Holt-Winters Exponential Smoothing Forecast for {rep}")
plt.xlabel("Date")
plt.ylabel("Price")
plt.legend()
plt.grid()
plt.show()

# Step 6: Evaluate the model
from sklearn.metrics import mean_absolute_error, mean_squared_error

# Evaluation metrics
mae = mean_absolute_error(test_data, forecast)
rmse = mean_squared_error(test_data, forecast, squared=False)
mape = (abs((test_data - forecast) / test_data).mean()) * 100

print(f"Evaluation Metrics for {rep}:")
print(f"Mean Absolute Error (MAE): {mae:.2f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")
print(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%")


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.holtwinters import ExponentialSmoothing

# Step 1: Load and prepare data
crypto_data = pd.read_csv("Clustering.csv")  # Replace with your CSV file name
crypto_data = crypto_data.set_index("Ticker")  # Set Ticker as index for filtering individual cryptocurrencies

# Specify the cryptocurrency for analysis
rep = "ETH-USD"  # Replace with your desired cryptocurrency

# Extract data for the chosen cryptocurrency
price_data = crypto_data.loc[rep].copy()

# Ensure index is in datetime format and clean the data
price_data_clean = price_data.drop(labels=["Cluster"], errors="ignore")  # Drop 'Cluster' if present
price_data_clean.index = pd.to_datetime(price_data_clean.index, errors='coerce', format='%Y-%m-%d')
price_data_clean = price_data_clean.dropna()  # Drop rows with invalid dates
price_data_clean = price_data_clean.sort_index()  # Sort by date

# Step 2: Train-test split
train_data = price_data_clean[:-30]  # Use all but the last 30 points for training
test_data = price_data_clean[-30:]   # Use the last 30 points for testing

# Step 3: Fit the Holt-Winters model
model = ExponentialSmoothing(
    train_data,
    trend="mul",  # Multiplicative trend
    seasonal=None,  # No seasonality
    initialization_method="estimated"  # Automatically initialize
)
hw_fit = model.fit()

# Step 4: Forecast the next 30 days
forecast = hw_fit.forecast(steps=30)
forecast.index = test_data.index  # Align forecast index with test data

# Step 5: Plot the results
plt.figure(figsize=(12, 6))
plt.plot(train_data.index, train_data, label="Training Data", color="blue")
plt.plot(test_data.index, test_data, label="Actual Test Data", color="orange")
plt.plot(forecast.index, forecast, label="Holt-Winters Forecast", linestyle="--", color="green")
plt.title(f"Holt-Winters Exponential Smoothing Forecast for {rep}")
plt.xlabel("Date")
plt.ylabel("Price")
plt.legend()
plt.grid()
plt.show()

# Step 6: Evaluate the model
from sklearn.metrics import mean_absolute_error, mean_squared_error

# Evaluation metrics
mae = mean_absolute_error(test_data, forecast)
rmse = mean_squared_error(test_data, forecast, squared=False)
mape = (abs((test_data - forecast) / test_data).mean()) * 100

print(f"Evaluation Metrics for {rep}:")
print(f"Mean Absolute Error (MAE): {mae:.2f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")
print(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%")


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.holtwinters import ExponentialSmoothing

# Step 1: Load and prepare data
crypto_data = pd.read_csv("Clustering.csv")  # Replace with your CSV file name
crypto_data = crypto_data.set_index("Ticker")  # Set Ticker as index for filtering individual cryptocurrencies

# Specify the cryptocurrency for analysis
rep = "XMR-USD"  # Replace with your desired cryptocurrency

# Extract data for the chosen cryptocurrency
price_data = crypto_data.loc[rep].copy()

# Ensure index is in datetime format and clean the data
price_data_clean = price_data.drop(labels=["Cluster"], errors="ignore")  # Drop 'Cluster' if present
price_data_clean.index = pd.to_datetime(price_data_clean.index, errors='coerce', format='%Y-%m-%d')
price_data_clean = price_data_clean.dropna()  # Drop rows with invalid dates
price_data_clean = price_data_clean.sort_index()  # Sort by date

# Step 2: Train-test split
train_data = price_data_clean[:-30]  # Use all but the last 30 points for training
test_data = price_data_clean[-30:]   # Use the last 30 points for testing

# Step 3: Fit the Holt-Winters model
model = ExponentialSmoothing(
    train_data,
    trend="mul",  # Multiplicative trend
    seasonal=None,  # No seasonality
    initialization_method="estimated"  # Automatically initialize
)
hw_fit = model.fit()

# Step 4: Forecast the next 30 days
forecast = hw_fit.forecast(steps=30)
forecast.index = test_data.index  # Align forecast index with test data

# Step 5: Plot the results
plt.figure(figsize=(12, 6))
plt.plot(train_data.index, train_data, label="Training Data", color="blue")
plt.plot(test_data.index, test_data, label="Actual Test Data", color="orange")
plt.plot(forecast.index, forecast, label="Holt-Winters Forecast", linestyle="--", color="green")
plt.title(f"Holt-Winters Exponential Smoothing Forecast for {rep}")
plt.xlabel("Date")
plt.ylabel("Price")
plt.legend()
plt.grid()
plt.show()

# Step 6: Evaluate the model
from sklearn.metrics import mean_absolute_error, mean_squared_error

# Evaluation metrics
mae = mean_absolute_error(test_data, forecast)
rmse = mean_squared_error(test_data, forecast, squared=False)
mape = (abs((test_data - forecast) / test_data).mean()) * 100

print(f"Evaluation Metrics for {rep}:")
print(f"Mean Absolute Error (MAE): {mae:.2f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")
print(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%")


XGBOOST

In [None]:
!pip install xgboost


In [None]:
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error
import matplotlib.pyplot as plt

# Load Data
crypto_data = pd.read_csv("Clustering.csv")  # Update to the correct file name

# Filter for the cryptocurrency of interest (e.g., SOL-USD)
price_data = crypto_data[crypto_data['Ticker'] == 'SOL-USD'].iloc[:, 1:-1]  # Exclude Ticker and Cluster columns
price_data = price_data.T  # Transpose to make dates the index
price_data.index = pd.to_datetime(price_data.index, errors='coerce')  # Ensure the index is datetime
price_data = price_data.dropna()  # Drop any rows with invalid dates
price_data = price_data.squeeze()  # Convert to a series for easier manipulation

# Prepare Sliding Window Data
def create_features(data, window_size=5):
    X, y = [], []
    for i in range(len(data) - window_size):
        X.append(data[i:i + window_size])
        y.append(data[i + window_size])
    return np.array(X), np.array(y)

window_size = 5
X, y = create_features(price_data.values, window_size)

# Split Data into Training and Testing Sets
train_ratio = 0.8
train_size = int(len(X) * train_ratio)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

# Train XGBoost Model
xgb_model = XGBRegressor(n_estimators=100, learning_rate=0.1, max_depth=5, random_state=42)
xgb_model.fit(X_train, y_train)

# Make Predictions
y_pred = xgb_model.predict(X_test)

# Evaluation Metrics
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
# Fix MAPE to handle zero values in y_test
non_zero_indices = y_test != 0  # Identify indices where y_test is non-zero
mape = np.mean(np.abs((y_test[non_zero_indices] - y_pred[non_zero_indices]) / y_test[non_zero_indices])) * 100

print(f"Evaluation Metrics for XGBoost SOL-USD:")
print(f"Mean Absolute Error (MAE): {mae:.2f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")
print(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%")

# Visualize Predictions
plt.figure(figsize=(10, 6))
plt.plot(price_data.index[-len(y_test):], y_test, label="Actual Prices", color="blue")
plt.plot(price_data.index[-len(y_test):], y_pred, label="Predicted Prices", color="orange")
plt.title("XGBoost Model SOL Predictions")
plt.xlabel("Date")
plt.ylabel("Price")
plt.legend()
plt.show()



In [None]:
import numpy as np
import pandas as pd
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error
import matplotlib.pyplot as plt

# Load Data
crypto_data = pd.read_csv("Clustering.csv")  # Update to the correct file name
price_data = crypto_data[crypto_data['Ticker'] == 'BTC-USD'].iloc[:, 1:-1].values.flatten()  # Replace with the desired cryptocurrency

# Prepare Sliding Window Data
def create_features(data, window_size=5):
    X, y = [], []
    for i in range(len(data) - window_size):
        X.append(data[i:i + window_size])
        y.append(data[i + window_size])
    return np.array(X), np.array(y)

window_size = 5
X, y = create_features(price_data, window_size)

# Walk-Forward Validation
n_splits = 10  # Number of folds
fold_size = len(X) // n_splits

results = []
y_test_all = []
y_pred_all = []

for i in range(n_splits):
    train_end = (i + 1) * fold_size
    test_end = min(len(X), train_end + fold_size)

    # Train-test split for the current fold
    X_train, y_train = X[:train_end], y[:train_end]
    X_test, y_test = X[train_end:test_end], y[train_end:test_end]

    if len(y_test) == 0 or len(X_train) == 0:  # Skip invalid splits
        continue

    # Train XGBoost model
    xgb_model = XGBRegressor(n_estimators=100, max_depth=3, learning_rate=0.1, random_state=42)
    xgb_model.fit(X_train, y_train)

    # Predict and collect metrics
    y_pred = xgb_model.predict(X_test)
    y_test_all.extend(y_test)
    y_pred_all.extend(y_pred)

    mae = mean_absolute_error(y_test, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100

    results.append({'MAE': mae, 'RMSE': rmse, 'MAPE': mape})

    print(f"Fold {i + 1} Results:")
    print(f"Mean Absolute Error (MAE): {mae:.2f}")
    print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")
    print(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%")
    print("-" * 50)

# Calculate Average Results
average_results = {metric: np.mean([r[metric] for r in results]) for metric in results[0]}
print("\nAverage Walk-Forward Validation Results:")
for metric, value in average_results.items():
    print(f"{metric}: {value:.2f}")

# Plot Final Predictions
plt.figure(figsize=(12, 6))
plt.plot(range(len(y_test_all)), y_test_all, label="Actual Prices", color="blue")
plt.plot(range(len(y_pred_all)), y_pred_all, label="Predicted Prices", color="orange", linestyle="--")
plt.title("Final XGBoost Model BTC Predictions with Walk-Forward Validation")
plt.xlabel("Time Steps")
plt.ylabel("Price")
plt.legend()
plt.grid()
plt.show()


In [None]:
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error
import matplotlib.pyplot as plt

# Load Data
crypto_data = pd.read_csv("Clustering.csv")  # Update to the correct file name

# Filter for the cryptocurrency of interest (e.g., SOL-USD)
price_data = crypto_data[crypto_data['Ticker'] == 'ETH-USD'].iloc[:, 1:-1]  # Exclude Ticker and Cluster columns
price_data = price_data.T  # Transpose to make dates the index
price_data.index = pd.to_datetime(price_data.index, errors='coerce')  # Ensure the index is datetime
price_data = price_data.dropna()  # Drop any rows with invalid dates
price_data = price_data.squeeze()  # Convert to a series for easier manipulation

# Prepare Sliding Window Data
def create_features(data, window_size=5):
    X, y = [], []
    for i in range(len(data) - window_size):
        X.append(data[i:i + window_size])
        y.append(data[i + window_size])
    return np.array(X), np.array(y)

window_size = 5
X, y = create_features(price_data.values, window_size)

# Split Data into Training and Testing Sets
train_ratio = 0.8
train_size = int(len(X) * train_ratio)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

# Train XGBoost Model
xgb_model = XGBRegressor(n_estimators=100, learning_rate=0.1, max_depth=5, random_state=42)
xgb_model.fit(X_train, y_train)

# Make Predictions
y_pred = xgb_model.predict(X_test)

# Evaluation Metrics
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
# Fix MAPE to handle zero values in y_test
non_zero_indices = y_test != 0  # Identify indices where y_test is non-zero
mape = np.mean(np.abs((y_test[non_zero_indices] - y_pred[non_zero_indices]) / y_test[non_zero_indices])) * 100

print(f"Evaluation Metrics for XGBoost ETH-USD:")
print(f"Mean Absolute Error (MAE): {mae:.2f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")
print(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%")

# Visualize Predictions
plt.figure(figsize=(10, 6))
plt.plot(price_data.index[-len(y_test):], y_test, label="Actual Prices", color="blue")
plt.plot(price_data.index[-len(y_test):], y_pred, label="Predicted Prices", color="orange")
plt.title("XGBoost Model ETH Predictions")
plt.xlabel("Date")
plt.ylabel("Price")
plt.legend()
plt.show()



In [None]:
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error
import matplotlib.pyplot as plt

# Load Data
crypto_data = pd.read_csv("Clustering.csv")  # Update to the correct file name

# Filter for the cryptocurrency of interest (e.g., SOL-USD)
price_data = crypto_data[crypto_data['Ticker'] == 'XMR-USD'].iloc[:, 1:-1]  # Exclude Ticker and Cluster columns
price_data = price_data.T  # Transpose to make dates the index
price_data.index = pd.to_datetime(price_data.index, errors='coerce')  # Ensure the index is datetime
price_data = price_data.dropna()  # Drop any rows with invalid dates
price_data = price_data.squeeze()  # Convert to a series for easier manipulation

# Prepare Sliding Window Data
def create_features(data, window_size=5):
    X, y = [], []
    for i in range(len(data) - window_size):
        X.append(data[i:i + window_size])
        y.append(data[i + window_size])
    return np.array(X), np.array(y)

window_size = 5
X, y = create_features(price_data.values, window_size)

# Split Data into Training and Testing Sets
train_ratio = 0.8
train_size = int(len(X) * train_ratio)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

# Train XGBoost Model
xgb_model = XGBRegressor(n_estimators=100, learning_rate=0.1, max_depth=5, random_state=42)
xgb_model.fit(X_train, y_train)

# Make Predictions
y_pred = xgb_model.predict(X_test)

# Evaluation Metrics
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
# Fix MAPE to handle zero values in y_test
non_zero_indices = y_test != 0  # Identify indices where y_test is non-zero
mape = np.mean(np.abs((y_test[non_zero_indices] - y_pred[non_zero_indices]) / y_test[non_zero_indices])) * 100

print(f"Evaluation Metrics for XGBoost SOL-USD:")
print(f"Mean Absolute Error (MAE): {mae:.2f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")
print(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%")

# Visualize Predictions
plt.figure(figsize=(10, 6))
plt.plot(price_data.index[-len(y_test):], y_test, label="Actual Prices", color="blue")
plt.plot(price_data.index[-len(y_test):], y_pred, label="Predicted Prices", color="orange")
plt.title("XGBoost Model XMR Predictions")
plt.xlabel("Date")
plt.ylabel("Price")
plt.legend()
plt.show()


SVR

In [None]:
import pandas as pd
import numpy as np
from sklearn.svm import SVR
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
import matplotlib.pyplot as plt

# Load Data
crypto_data = pd.read_csv("Clustering.csv")  # Replace with your file name

# Identify date-related columns (if applicable)
date_columns = [col for col in crypto_data.columns if col != 'Ticker']

# Convert only valid date columns to datetime while keeping others unchanged
crypto_data.columns = [
    pd.to_datetime(col, errors='coerce') if col in date_columns else col
    for col in crypto_data.columns
]

# Ensure the 'Ticker' column remains intact
if 'Ticker' not in crypto_data.columns:
    raise ValueError("The 'Ticker' column is missing in the CSV file.")

# Filter and reshape the data for the specific ticker
price_data = crypto_data[crypto_data['Ticker'] == 'SOL-USD'].iloc[:, 1:-1].T  # Transpose to make dates the index
price_data.index = pd.to_datetime(price_data.index, errors='coerce')  # Ensure the index is datetime
price_data = price_data.dropna()  # Drop rows with invalid dates

# Extract dates and values
dates = price_data.index
prices = price_data.values.flatten()

# Prepare Sliding Window Data
def create_features(data, window_size=5):
    X, y = [], []
    for i in range(len(data) - window_size):
        X.append(data[i:i + window_size])
        y.append(data[i + window_size])
    return np.array(X), np.array(y)

window_size = 5
X, y = create_features(prices, window_size)

# Split Data into Training and Testing Sets
train_ratio = 0.8
train_size = int(len(X) * train_ratio)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]
dates_test = dates[train_size + window_size:] 

# Scale Features
scaler = MinMaxScaler(feature_range=(0, 1))
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train SVR Model
svr_model = SVR(kernel='rbf', C=1e3, gamma=0.1)
svr_model.fit(X_train, y_train)

# Make Predictions
y_pred = svr_model.predict(X_test)

# Evaluation Metrics
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100

print(f"Evaluation Metrics for SVR SOL-USD:")
print(f"Mean Absolute Error (MAE): {mae:.2f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")
print(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%")

# Visualize Predictions
plt.figure(figsize=(12, 6))
plt.plot(dates_test, y_test, label="Actual Prices", color="blue")
plt.plot(dates_test, y_pred, label="Predicted Prices", color="orange", linestyle="--")
plt.title("SVR Model SOL Predictions")
plt.xlabel("Date")
plt.ylabel("Price")
plt.legend()
plt.grid()
plt.show()


In [None]:
import pandas as pd
import numpy as np
from sklearn.svm import SVR
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
import matplotlib.pyplot as plt

# Load Data
crypto_data = pd.read_csv("Clustering.csv")  # Replace with your file name

# Identify date-related columns (if applicable)
date_columns = [col for col in crypto_data.columns if col != 'Ticker']

# Convert only valid date columns to datetime while keeping others unchanged
crypto_data.columns = [
    pd.to_datetime(col, errors='coerce') if col in date_columns else col
    for col in crypto_data.columns
]

# Ensure the 'Ticker' column remains intact
if 'Ticker' not in crypto_data.columns:
    raise ValueError("The 'Ticker' column is missing in the CSV file.")

# Filter and reshape the data for the specific ticker
price_data = crypto_data[crypto_data['Ticker'] == 'BTC-USD'].iloc[:, 1:-1].T  # Transpose to make dates the index
price_data.index = pd.to_datetime(price_data.index, errors='coerce')  # Ensure the index is datetime
price_data = price_data.dropna()  # Drop rows with invalid dates

# Extract dates and values
dates = price_data.index
prices = price_data.values.flatten()

# Prepare Sliding Window Data
def create_features(data, window_size=5):
    X, y = [], []
    for i in range(len(data) - window_size):
        X.append(data[i:i + window_size])
        y.append(data[i + window_size])
    return np.array(X), np.array(y)

window_size = 5
X, y = create_features(prices, window_size)

# Split Data into Training and Testing Sets
train_ratio = 0.8
train_size = int(len(X) * train_ratio)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]
dates_test = dates[train_size + window_size:]  # Align test dates with predictions

# Scale Features
scaler = MinMaxScaler(feature_range=(0, 1))
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train SVR Model
svr_model = SVR(kernel='rbf', C=1e3, gamma=0.1)
svr_model.fit(X_train, y_train)

# Make Predictions
y_pred = svr_model.predict(X_test)

# Evaluation Metrics
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100

print(f"Evaluation Metrics for SVR BTC-USD:")
print(f"Mean Absolute Error (MAE): {mae:.2f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")
print(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%")

# Visualize Predictions
plt.figure(figsize=(12, 6))
plt.plot(dates_test, y_test, label="Actual Prices", color="blue")
plt.plot(dates_test, y_pred, label="Predicted Prices", color="orange", linestyle="--")
plt.title("SVR Model BTC Predictions")
plt.xlabel("Date")
plt.ylabel("Price")
plt.legend()
plt.grid()
plt.show()


In [None]:
import pandas as pd
import numpy as np
from sklearn.svm import SVR
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
import matplotlib.pyplot as plt

# Load Data
crypto_data = pd.read_csv("Clustering.csv")  # Replace with your file name

# Identify date-related columns (if applicable)
date_columns = [col for col in crypto_data.columns if col != 'Ticker']

# Convert only valid date columns to datetime while keeping others unchanged
crypto_data.columns = [
    pd.to_datetime(col, errors='coerce') if col in date_columns else col
    for col in crypto_data.columns
]

# Ensure the 'Ticker' column remains intact
if 'Ticker' not in crypto_data.columns:
    raise ValueError("The 'Ticker' column is missing in the CSV file.")

# Filter and reshape the data for the specific ticker
price_data = crypto_data[crypto_data['Ticker'] == 'ETH-USD'].iloc[:, 1:-1].T  # Transpose to make dates the index
price_data.index = pd.to_datetime(price_data.index, errors='coerce')  # Ensure the index is datetime
price_data = price_data.dropna()  # Drop rows with invalid dates

# Extract dates and values
dates = price_data.index
prices = price_data.values.flatten()

# Prepare Sliding Window Data
def create_features(data, window_size=5):
    X, y = [], []
    for i in range(len(data) - window_size):
        X.append(data[i:i + window_size])
        y.append(data[i + window_size])
    return np.array(X), np.array(y)

window_size = 5
X, y = create_features(prices, window_size)

# Split Data into Training and Testing Sets
train_ratio = 0.8
train_size = int(len(X) * train_ratio)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]
dates_test = dates[train_size + window_size:]  # Align test dates with predictions

# Scale Features
scaler = MinMaxScaler(feature_range=(0, 1))
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train SVR Model
svr_model = SVR(kernel='rbf', C=1e3, gamma=0.1)
svr_model.fit(X_train, y_train)

# Make Predictions
y_pred = svr_model.predict(X_test)

# Evaluation Metrics
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100

print(f"Evaluation Metrics for SVR ETH-USD:")
print(f"Mean Absolute Error (MAE): {mae:.2f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")
print(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%")

# Visualize Predictions
plt.figure(figsize=(12, 6))
plt.plot(dates_test, y_test, label="Actual Prices", color="blue")
plt.plot(dates_test, y_pred, label="Predicted Prices", color="orange", linestyle="--")
plt.title("SVR Model ETH Predictions")
plt.xlabel("Date")
plt.ylabel("Price")
plt.legend()
plt.grid()
plt.show()


In [None]:
import pandas as pd
import numpy as np
from sklearn.svm import SVR
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
import matplotlib.pyplot as plt

# Load Data
crypto_data = pd.read_csv("Clustering.csv")  # Replace with your file name

# Identify date-related columns (if applicable)
date_columns = [col for col in crypto_data.columns if col != 'Ticker']

# Convert only valid date columns to datetime while keeping others unchanged
crypto_data.columns = [
    pd.to_datetime(col, errors='coerce') if col in date_columns else col
    for col in crypto_data.columns
]

# Ensure the 'Ticker' column remains intact
if 'Ticker' not in crypto_data.columns:
    raise ValueError("The 'Ticker' column is missing in the CSV file.")

# Filter and reshape the data for the specific ticker
price_data = crypto_data[crypto_data['Ticker'] == 'XMR-USD'].iloc[:, 1:-1].T  # Transpose to make dates the index
price_data.index = pd.to_datetime(price_data.index, errors='coerce')  # Ensure the index is datetime
price_data = price_data.dropna()  # Drop rows with invalid dates

# Extract dates and values
dates = price_data.index
prices = price_data.values.flatten()

# Prepare Sliding Window Data
def create_features(data, window_size=5):
    X, y = [], []
    for i in range(len(data) - window_size):
        X.append(data[i:i + window_size])
        y.append(data[i + window_size])
    return np.array(X), np.array(y)

window_size = 5
X, y = create_features(prices, window_size)

# Split Data into Training and Testing Sets
train_ratio = 0.8
train_size = int(len(X) * train_ratio)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]
dates_test = dates[train_size + window_size:]  # Align test dates with predictions

# Scale Features
scaler = MinMaxScaler(feature_range=(0, 1))
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train SVR Model
svr_model = SVR(kernel='rbf', C=1e3, gamma=0.1)
svr_model.fit(X_train, y_train)

# Make Predictions
y_pred = svr_model.predict(X_test)

# Evaluation Metrics
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100

print(f"Evaluation Metrics for SVR XMR-USD:")
print(f"Mean Absolute Error (MAE): {mae:.2f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")
print(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%")

# Visualize Predictions
plt.figure(figsize=(12, 6))
plt.plot(dates_test, y_test, label="Actual Prices", color="blue")
plt.plot(dates_test, y_pred, label="Predicted Prices", color="orange", linestyle="--")
plt.title("SVR Model XMR Predictions")
plt.xlabel("Date")
plt.ylabel("Price")
plt.legend()
plt.grid()
plt.show()


STEP 6